summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_tables.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_tables.c')
-rw-r--r--thirdparty/pcre2/src/pcre2_tables.c378
1 files changed, 196 insertions, 182 deletions
diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c
index 9f8dc293aa..83d6f9de55 100644
--- a/thirdparty/pcre2/src/pcre2_tables.c
+++ b/thirdparty/pcre2/src/pcre2_tables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
-ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
-the properties of two adjacent code points. The left property selects a word
-from the table, and the right property selects a bit from that word like this:
+ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
+10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
+code points. The left property selects a word from the table, and the right
+property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
@@ -166,49 +167,41 @@ are implementing).
6. Do not break after Prepend characters.
-7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
- by E_Modifier). Extend characters are allowed before the modifier; this
- cannot be represented in this table, the code has to deal with it.
+7. Do not break within emoji modifier sequences or emoji zwj sequences. That
+ is, do not break between characters with the Extended_Pictographic property.
+ Extend and ZWJ characters are allowed between the characters; this cannot be
+ represented in this table, the code has to deal with it.
-8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
- E_Base_GAZ).
-
-9. Do not break within emoji flag sequences. That is, do not break between
+8. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs.
-10. Otherwise, break everywhere.
+9. Otherwise, break everywhere.
*/
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
- (1<<ucp_gbLF), /* 0 CR */
- 0, /* 1 LF */
- 0, /* 2 Control */
- ESZ, /* 3 Extend */
- ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
+ (1<<ucp_gbLF), /* 0 CR */
+ 0, /* 1 LF */
+ 0, /* 2 Control */
+ ESZ, /* 3 Extend */
+ ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
- (1<<ucp_gbRegionalIndicator)|
- (1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
- (1<<ucp_gbE_Base_GAZ)|
- (1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
- ESZ, /* 5 SpacingMark */
- ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
+ (1<<ucp_gbRegionalIndicator),
+ ESZ, /* 5 SpacingMark */
+ ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbLVT),
- ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
- ESZ|(1<<ucp_gbT), /* 8 T */
- ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
- ESZ|(1<<ucp_gbT), /* 10 LVT */
- (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
- ESZ, /* 12 Other */
- ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
- ESZ, /* 14 E_Modifier */
- ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
- ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
- ESZ /* 12 Glue_After_Zwj */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
+ ESZ|(1<<ucp_gbT), /* 8 T */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
+ ESZ|(1<<ucp_gbT), /* 10 LVT */
+ (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
+ ESZ, /* 12 Other */
+ ESZ, /* 13 ZWJ */
+ ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
};
#undef ESZ
@@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
@@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
+#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
#define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
+#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
@@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
+#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
@@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
+#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
@@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
+#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
@@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
+#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
@@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
+ STRING_Dogra0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
@@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
STRING_Grantha0
STRING_Greek0
STRING_Gujarati0
+ STRING_Gunjala_Gondi0
STRING_Gurmukhi0
STRING_Han0
STRING_Hangul0
+ STRING_Hanifi_Rohingya0
STRING_Hanunoo0
STRING_Hatran0
STRING_Hebrew0
@@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
STRING_Lydian0
STRING_M0
STRING_Mahajani0
+ STRING_Makasar0
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
@@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
STRING_Masaram_Gondi0
STRING_Mc0
STRING_Me0
+ STRING_Medefaidrin0
STRING_Meetei_Mayek0
STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0
@@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
STRING_Old_North_Arabian0
STRING_Old_Permic0
STRING_Old_Persian0
+ STRING_Old_Sogdian0
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
@@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
STRING_Sk0
STRING_Sm0
STRING_So0
+ STRING_Sogdian0
STRING_Sora_Sompeng0
STRING_Soyombo0
STRING_Sundanese0
@@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
{ 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari },
- { 293, PT_SC, ucp_Duployan },
- { 302, PT_SC, ucp_Egyptian_Hieroglyphs },
- { 323, PT_SC, ucp_Elbasan },
- { 331, PT_SC, ucp_Ethiopic },
- { 340, PT_SC, ucp_Georgian },
- { 349, PT_SC, ucp_Glagolitic },
- { 360, PT_SC, ucp_Gothic },
- { 367, PT_SC, ucp_Grantha },
- { 375, PT_SC, ucp_Greek },
- { 381, PT_SC, ucp_Gujarati },
- { 390, PT_SC, ucp_Gurmukhi },
- { 399, PT_SC, ucp_Han },
- { 403, PT_SC, ucp_Hangul },
- { 410, PT_SC, ucp_Hanunoo },
- { 418, PT_SC, ucp_Hatran },
- { 425, PT_SC, ucp_Hebrew },
- { 432, PT_SC, ucp_Hiragana },
- { 441, PT_SC, ucp_Imperial_Aramaic },
- { 458, PT_SC, ucp_Inherited },
- { 468, PT_SC, ucp_Inscriptional_Pahlavi },
- { 490, PT_SC, ucp_Inscriptional_Parthian },
- { 513, PT_SC, ucp_Javanese },
- { 522, PT_SC, ucp_Kaithi },
- { 529, PT_SC, ucp_Kannada },
- { 537, PT_SC, ucp_Katakana },
- { 546, PT_SC, ucp_Kayah_Li },
- { 555, PT_SC, ucp_Kharoshthi },
- { 566, PT_SC, ucp_Khmer },
- { 572, PT_SC, ucp_Khojki },
- { 579, PT_SC, ucp_Khudawadi },
- { 589, PT_GC, ucp_L },
- { 591, PT_LAMP, 0 },
- { 594, PT_SC, ucp_Lao },
- { 598, PT_SC, ucp_Latin },
- { 604, PT_SC, ucp_Lepcha },
- { 611, PT_SC, ucp_Limbu },
- { 617, PT_SC, ucp_Linear_A },
- { 626, PT_SC, ucp_Linear_B },
- { 635, PT_SC, ucp_Lisu },
- { 640, PT_PC, ucp_Ll },
- { 643, PT_PC, ucp_Lm },
- { 646, PT_PC, ucp_Lo },
- { 649, PT_PC, ucp_Lt },
- { 652, PT_PC, ucp_Lu },
- { 655, PT_SC, ucp_Lycian },
- { 662, PT_SC, ucp_Lydian },
- { 669, PT_GC, ucp_M },
- { 671, PT_SC, ucp_Mahajani },
- { 680, PT_SC, ucp_Malayalam },
- { 690, PT_SC, ucp_Mandaic },
- { 698, PT_SC, ucp_Manichaean },
- { 709, PT_SC, ucp_Marchen },
- { 717, PT_SC, ucp_Masaram_Gondi },
- { 731, PT_PC, ucp_Mc },
- { 734, PT_PC, ucp_Me },
- { 737, PT_SC, ucp_Meetei_Mayek },
- { 750, PT_SC, ucp_Mende_Kikakui },
- { 764, PT_SC, ucp_Meroitic_Cursive },
- { 781, PT_SC, ucp_Meroitic_Hieroglyphs },
- { 802, PT_SC, ucp_Miao },
- { 807, PT_PC, ucp_Mn },
- { 810, PT_SC, ucp_Modi },
- { 815, PT_SC, ucp_Mongolian },
- { 825, PT_SC, ucp_Mro },
- { 829, PT_SC, ucp_Multani },
- { 837, PT_SC, ucp_Myanmar },
- { 845, PT_GC, ucp_N },
- { 847, PT_SC, ucp_Nabataean },
- { 857, PT_PC, ucp_Nd },
- { 860, PT_SC, ucp_New_Tai_Lue },
- { 872, PT_SC, ucp_Newa },
- { 877, PT_SC, ucp_Nko },
- { 881, PT_PC, ucp_Nl },
- { 884, PT_PC, ucp_No },
- { 887, PT_SC, ucp_Nushu },
- { 893, PT_SC, ucp_Ogham },
- { 899, PT_SC, ucp_Ol_Chiki },
- { 908, PT_SC, ucp_Old_Hungarian },
- { 922, PT_SC, ucp_Old_Italic },
- { 933, PT_SC, ucp_Old_North_Arabian },
- { 951, PT_SC, ucp_Old_Permic },
- { 962, PT_SC, ucp_Old_Persian },
- { 974, PT_SC, ucp_Old_South_Arabian },
- { 992, PT_SC, ucp_Old_Turkic },
- { 1003, PT_SC, ucp_Oriya },
- { 1009, PT_SC, ucp_Osage },
- { 1015, PT_SC, ucp_Osmanya },
- { 1023, PT_GC, ucp_P },
- { 1025, PT_SC, ucp_Pahawh_Hmong },
- { 1038, PT_SC, ucp_Palmyrene },
- { 1048, PT_SC, ucp_Pau_Cin_Hau },
- { 1060, PT_PC, ucp_Pc },
- { 1063, PT_PC, ucp_Pd },
- { 1066, PT_PC, ucp_Pe },
- { 1069, PT_PC, ucp_Pf },
- { 1072, PT_SC, ucp_Phags_Pa },
- { 1081, PT_SC, ucp_Phoenician },
- { 1092, PT_PC, ucp_Pi },
- { 1095, PT_PC, ucp_Po },
- { 1098, PT_PC, ucp_Ps },
- { 1101, PT_SC, ucp_Psalter_Pahlavi },
- { 1117, PT_SC, ucp_Rejang },
- { 1124, PT_SC, ucp_Runic },
- { 1130, PT_GC, ucp_S },
- { 1132, PT_SC, ucp_Samaritan },
- { 1142, PT_SC, ucp_Saurashtra },
- { 1153, PT_PC, ucp_Sc },
- { 1156, PT_SC, ucp_Sharada },
- { 1164, PT_SC, ucp_Shavian },
- { 1172, PT_SC, ucp_Siddham },
- { 1180, PT_SC, ucp_SignWriting },
- { 1192, PT_SC, ucp_Sinhala },
- { 1200, PT_PC, ucp_Sk },
- { 1203, PT_PC, ucp_Sm },
- { 1206, PT_PC, ucp_So },
- { 1209, PT_SC, ucp_Sora_Sompeng },
- { 1222, PT_SC, ucp_Soyombo },
- { 1230, PT_SC, ucp_Sundanese },
- { 1240, PT_SC, ucp_Syloti_Nagri },
- { 1253, PT_SC, ucp_Syriac },
- { 1260, PT_SC, ucp_Tagalog },
- { 1268, PT_SC, ucp_Tagbanwa },
- { 1277, PT_SC, ucp_Tai_Le },
- { 1284, PT_SC, ucp_Tai_Tham },
- { 1293, PT_SC, ucp_Tai_Viet },
- { 1302, PT_SC, ucp_Takri },
- { 1308, PT_SC, ucp_Tamil },
- { 1314, PT_SC, ucp_Tangut },
- { 1321, PT_SC, ucp_Telugu },
- { 1328, PT_SC, ucp_Thaana },
- { 1335, PT_SC, ucp_Thai },
- { 1340, PT_SC, ucp_Tibetan },
- { 1348, PT_SC, ucp_Tifinagh },
- { 1357, PT_SC, ucp_Tirhuta },
- { 1365, PT_SC, ucp_Ugaritic },
- { 1374, PT_SC, ucp_Vai },
- { 1378, PT_SC, ucp_Warang_Citi },
- { 1390, PT_ALNUM, 0 },
- { 1394, PT_PXSPACE, 0 },
- { 1398, PT_SPACE, 0 },
- { 1402, PT_UCNC, 0 },
- { 1406, PT_WORD, 0 },
- { 1410, PT_SC, ucp_Yi },
- { 1413, PT_GC, ucp_Z },
- { 1415, PT_SC, ucp_Zanabazar_Square },
- { 1432, PT_PC, ucp_Zl },
- { 1435, PT_PC, ucp_Zp },
- { 1438, PT_PC, ucp_Zs }
+ { 293, PT_SC, ucp_Dogra },
+ { 299, PT_SC, ucp_Duployan },
+ { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
+ { 329, PT_SC, ucp_Elbasan },
+ { 337, PT_SC, ucp_Ethiopic },
+ { 346, PT_SC, ucp_Georgian },
+ { 355, PT_SC, ucp_Glagolitic },
+ { 366, PT_SC, ucp_Gothic },
+ { 373, PT_SC, ucp_Grantha },
+ { 381, PT_SC, ucp_Greek },
+ { 387, PT_SC, ucp_Gujarati },
+ { 396, PT_SC, ucp_Gunjala_Gondi },
+ { 410, PT_SC, ucp_Gurmukhi },
+ { 419, PT_SC, ucp_Han },
+ { 423, PT_SC, ucp_Hangul },
+ { 430, PT_SC, ucp_Hanifi_Rohingya },
+ { 446, PT_SC, ucp_Hanunoo },
+ { 454, PT_SC, ucp_Hatran },
+ { 461, PT_SC, ucp_Hebrew },
+ { 468, PT_SC, ucp_Hiragana },
+ { 477, PT_SC, ucp_Imperial_Aramaic },
+ { 494, PT_SC, ucp_Inherited },
+ { 504, PT_SC, ucp_Inscriptional_Pahlavi },
+ { 526, PT_SC, ucp_Inscriptional_Parthian },
+ { 549, PT_SC, ucp_Javanese },
+ { 558, PT_SC, ucp_Kaithi },
+ { 565, PT_SC, ucp_Kannada },
+ { 573, PT_SC, ucp_Katakana },
+ { 582, PT_SC, ucp_Kayah_Li },
+ { 591, PT_SC, ucp_Kharoshthi },
+ { 602, PT_SC, ucp_Khmer },
+ { 608, PT_SC, ucp_Khojki },
+ { 615, PT_SC, ucp_Khudawadi },
+ { 625, PT_GC, ucp_L },
+ { 627, PT_LAMP, 0 },
+ { 630, PT_SC, ucp_Lao },
+ { 634, PT_SC, ucp_Latin },
+ { 640, PT_SC, ucp_Lepcha },
+ { 647, PT_SC, ucp_Limbu },
+ { 653, PT_SC, ucp_Linear_A },
+ { 662, PT_SC, ucp_Linear_B },
+ { 671, PT_SC, ucp_Lisu },
+ { 676, PT_PC, ucp_Ll },
+ { 679, PT_PC, ucp_Lm },
+ { 682, PT_PC, ucp_Lo },
+ { 685, PT_PC, ucp_Lt },
+ { 688, PT_PC, ucp_Lu },
+ { 691, PT_SC, ucp_Lycian },
+ { 698, PT_SC, ucp_Lydian },
+ { 705, PT_GC, ucp_M },
+ { 707, PT_SC, ucp_Mahajani },
+ { 716, PT_SC, ucp_Makasar },
+ { 724, PT_SC, ucp_Malayalam },
+ { 734, PT_SC, ucp_Mandaic },
+ { 742, PT_SC, ucp_Manichaean },
+ { 753, PT_SC, ucp_Marchen },
+ { 761, PT_SC, ucp_Masaram_Gondi },
+ { 775, PT_PC, ucp_Mc },
+ { 778, PT_PC, ucp_Me },
+ { 781, PT_SC, ucp_Medefaidrin },
+ { 793, PT_SC, ucp_Meetei_Mayek },
+ { 806, PT_SC, ucp_Mende_Kikakui },
+ { 820, PT_SC, ucp_Meroitic_Cursive },
+ { 837, PT_SC, ucp_Meroitic_Hieroglyphs },
+ { 858, PT_SC, ucp_Miao },
+ { 863, PT_PC, ucp_Mn },
+ { 866, PT_SC, ucp_Modi },
+ { 871, PT_SC, ucp_Mongolian },
+ { 881, PT_SC, ucp_Mro },
+ { 885, PT_SC, ucp_Multani },
+ { 893, PT_SC, ucp_Myanmar },
+ { 901, PT_GC, ucp_N },
+ { 903, PT_SC, ucp_Nabataean },
+ { 913, PT_PC, ucp_Nd },
+ { 916, PT_SC, ucp_New_Tai_Lue },
+ { 928, PT_SC, ucp_Newa },
+ { 933, PT_SC, ucp_Nko },
+ { 937, PT_PC, ucp_Nl },
+ { 940, PT_PC, ucp_No },
+ { 943, PT_SC, ucp_Nushu },
+ { 949, PT_SC, ucp_Ogham },
+ { 955, PT_SC, ucp_Ol_Chiki },
+ { 964, PT_SC, ucp_Old_Hungarian },
+ { 978, PT_SC, ucp_Old_Italic },
+ { 989, PT_SC, ucp_Old_North_Arabian },
+ { 1007, PT_SC, ucp_Old_Permic },
+ { 1018, PT_SC, ucp_Old_Persian },
+ { 1030, PT_SC, ucp_Old_Sogdian },
+ { 1042, PT_SC, ucp_Old_South_Arabian },
+ { 1060, PT_SC, ucp_Old_Turkic },
+ { 1071, PT_SC, ucp_Oriya },
+ { 1077, PT_SC, ucp_Osage },
+ { 1083, PT_SC, ucp_Osmanya },
+ { 1091, PT_GC, ucp_P },
+ { 1093, PT_SC, ucp_Pahawh_Hmong },
+ { 1106, PT_SC, ucp_Palmyrene },
+ { 1116, PT_SC, ucp_Pau_Cin_Hau },
+ { 1128, PT_PC, ucp_Pc },
+ { 1131, PT_PC, ucp_Pd },
+ { 1134, PT_PC, ucp_Pe },
+ { 1137, PT_PC, ucp_Pf },
+ { 1140, PT_SC, ucp_Phags_Pa },
+ { 1149, PT_SC, ucp_Phoenician },
+ { 1160, PT_PC, ucp_Pi },
+ { 1163, PT_PC, ucp_Po },
+ { 1166, PT_PC, ucp_Ps },
+ { 1169, PT_SC, ucp_Psalter_Pahlavi },
+ { 1185, PT_SC, ucp_Rejang },
+ { 1192, PT_SC, ucp_Runic },
+ { 1198, PT_GC, ucp_S },
+ { 1200, PT_SC, ucp_Samaritan },
+ { 1210, PT_SC, ucp_Saurashtra },
+ { 1221, PT_PC, ucp_Sc },
+ { 1224, PT_SC, ucp_Sharada },
+ { 1232, PT_SC, ucp_Shavian },
+ { 1240, PT_SC, ucp_Siddham },
+ { 1248, PT_SC, ucp_SignWriting },
+ { 1260, PT_SC, ucp_Sinhala },
+ { 1268, PT_PC, ucp_Sk },
+ { 1271, PT_PC, ucp_Sm },
+ { 1274, PT_PC, ucp_So },
+ { 1277, PT_SC, ucp_Sogdian },
+ { 1285, PT_SC, ucp_Sora_Sompeng },
+ { 1298, PT_SC, ucp_Soyombo },
+ { 1306, PT_SC, ucp_Sundanese },
+ { 1316, PT_SC, ucp_Syloti_Nagri },
+ { 1329, PT_SC, ucp_Syriac },
+ { 1336, PT_SC, ucp_Tagalog },
+ { 1344, PT_SC, ucp_Tagbanwa },
+ { 1353, PT_SC, ucp_Tai_Le },
+ { 1360, PT_SC, ucp_Tai_Tham },
+ { 1369, PT_SC, ucp_Tai_Viet },
+ { 1378, PT_SC, ucp_Takri },
+ { 1384, PT_SC, ucp_Tamil },
+ { 1390, PT_SC, ucp_Tangut },
+ { 1397, PT_SC, ucp_Telugu },
+ { 1404, PT_SC, ucp_Thaana },
+ { 1411, PT_SC, ucp_Thai },
+ { 1416, PT_SC, ucp_Tibetan },
+ { 1424, PT_SC, ucp_Tifinagh },
+ { 1433, PT_SC, ucp_Tirhuta },
+ { 1441, PT_SC, ucp_Ugaritic },
+ { 1450, PT_SC, ucp_Vai },
+ { 1454, PT_SC, ucp_Warang_Citi },
+ { 1466, PT_ALNUM, 0 },
+ { 1470, PT_PXSPACE, 0 },
+ { 1474, PT_SPACE, 0 },
+ { 1478, PT_UCNC, 0 },
+ { 1482, PT_WORD, 0 },
+ { 1486, PT_SC, ucp_Yi },
+ { 1489, PT_GC, ucp_Z },
+ { 1491, PT_SC, ucp_Zanabazar_Square },
+ { 1508, PT_PC, ucp_Zl },
+ { 1511, PT_PC, ucp_Zp },
+ { 1514, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);