diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_tables.c')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_tables.c | 448 |
1 files changed, 248 insertions, 200 deletions
diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index b945ed7a7f..9f8dc293aa 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE. */ /* This module contains some fixed tables that are used by more than one of the -PCRE code modules. The tables are also #included by the pcre2test program, +PCRE2 code modules. The tables are also #included by the pcre2test program, which uses macros to change their names from _pcre2_xxx to xxxx, thereby avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is defined. */ @@ -148,7 +148,7 @@ two code points. The breaking rules are as follows: 1. Break at the start and end of text (pretty obviously). -2. Do not break between a CR and LF; otherwise, break before and after +2. Do not break between a CR and LF; otherwise, break before and after controls. 3. Do not break Hangul syllable sequences, the rules for which are: @@ -157,44 +157,62 @@ two code points. The breaking rules are as follows: LV or V may be followed by V or T LVT or T may be followed by T -4. Do not break before extending characters. +4. Do not break before extending characters or zero-width-joiner (ZWJ). -The next two rules are only for extended grapheme clusters (but that's what we +The following rules are only for extended grapheme clusters (but that's what we are implementing). 5. Do not break before SpacingMarks. 6. Do not break after Prepend characters. -7. Otherwise, break everywhere. +7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed + by E_Modifier). Extend characters are allowed before the modifier; this + cannot be represented in this table, the code has to deal with it. + +8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or + E_Base_GAZ). + +9. Do not break within emoji flag sequences. That is, do not break between + regional indicator (RI) symbols if there are an odd number of RI characters + before the break point. This table encodes "join RI characters"; the code + has to deal with checking for previous adjoining RIs. + +10. Otherwise, break everywhere. */ +#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ) + const uint32_t PRIV(ucp_gbtable)[] = { (1<<ucp_gbLF), /* 0 CR */ 0, /* 1 LF */ 0, /* 2 Control */ - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */ - (1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */ - (1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| - (1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)| - (1<<ucp_gbLVT)|(1<<ucp_gbOther), - - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */ - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */ - (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT), - - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */ - (1<<ucp_gbT), - - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */ - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */ - (1<<ucp_gbT), - - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */ + ESZ, /* 3 Extend */ + ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */ + (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)| + (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)| + (1<<ucp_gbRegionalIndicator)| + (1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)| + (1<<ucp_gbE_Base_GAZ)| + (1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj), + ESZ, /* 5 SpacingMark */ + ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */ + (1<<ucp_gbLVT), + ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */ + ESZ|(1<<ucp_gbT), /* 8 T */ + ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */ + ESZ|(1<<ucp_gbT), /* 10 LVT */ (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */ - (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */ + ESZ, /* 12 Other */ + ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */ + ESZ, /* 14 E_Modifier */ + ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */ + ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */ + ESZ /* 12 Glue_After_Zwj */ }; +#undef ESZ + #ifdef SUPPORT_JIT /* This table reverses PRIV(ucp_gentype). We can save the cost of a memory load. */ @@ -227,6 +245,7 @@ version. Like all other character and string literals that are compared against the regular expression pattern, we must use STR_ macros instead of literal strings to make sure that UTF-8 support works on EBCDIC platforms. */ +#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0" #define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0" #define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" #define STRING_Any0 STR_A STR_n STR_y "\0" @@ -238,6 +257,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0" #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" +#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0" #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" #define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0" #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" @@ -313,6 +333,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" #define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" +#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0" +#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" #define STRING_Mc0 STR_M STR_c "\0" #define STRING_Me0 STR_M STR_e "\0" #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" @@ -330,9 +352,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" #define STRING_Nd0 STR_N STR_d "\0" #define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0" +#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0" #define STRING_Nko0 STR_N STR_k STR_o "\0" #define STRING_Nl0 STR_N STR_l "\0" #define STRING_No0 STR_N STR_o "\0" +#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0" #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" #define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0" @@ -343,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" +#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0" #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" #define STRING_P0 STR_P "\0" #define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0" @@ -373,6 +398,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Sm0 STR_S STR_m "\0" #define STRING_So0 STR_S STR_o "\0" #define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0" +#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0" #define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" #define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0" #define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0" @@ -383,6 +409,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0" #define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0" #define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0" +#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0" #define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0" #define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0" #define STRING_Thai0 STR_T STR_h STR_a STR_i "\0" @@ -399,11 +426,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Xwd0 STR_X STR_w STR_d "\0" #define STRING_Yi0 STR_Y STR_i "\0" #define STRING_Z0 STR_Z "\0" +#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0" #define STRING_Zl0 STR_Z STR_l "\0" #define STRING_Zp0 STR_Z STR_p "\0" #define STRING_Zs0 STR_Z STR_s "\0" const char PRIV(utt_names)[] = + STRING_Adlam0 STRING_Ahom0 STRING_Anatolian_Hieroglyphs0 STRING_Any0 @@ -415,6 +444,7 @@ const char PRIV(utt_names)[] = STRING_Bassa_Vah0 STRING_Batak0 STRING_Bengali0 + STRING_Bhaiksuki0 STRING_Bopomofo0 STRING_Brahmi0 STRING_Braille0 @@ -490,6 +520,8 @@ const char PRIV(utt_names)[] = STRING_Malayalam0 STRING_Mandaic0 STRING_Manichaean0 + STRING_Marchen0 + STRING_Masaram_Gondi0 STRING_Mc0 STRING_Me0 STRING_Meetei_Mayek0 @@ -507,9 +539,11 @@ const char PRIV(utt_names)[] = STRING_Nabataean0 STRING_Nd0 STRING_New_Tai_Lue0 + STRING_Newa0 STRING_Nko0 STRING_Nl0 STRING_No0 + STRING_Nushu0 STRING_Ogham0 STRING_Ol_Chiki0 STRING_Old_Hungarian0 @@ -520,6 +554,7 @@ const char PRIV(utt_names)[] = STRING_Old_South_Arabian0 STRING_Old_Turkic0 STRING_Oriya0 + STRING_Osage0 STRING_Osmanya0 STRING_P0 STRING_Pahawh_Hmong0 @@ -550,6 +585,7 @@ const char PRIV(utt_names)[] = STRING_Sm0 STRING_So0 STRING_Sora_Sompeng0 + STRING_Soyombo0 STRING_Sundanese0 STRING_Syloti_Nagri0 STRING_Syriac0 @@ -560,6 +596,7 @@ const char PRIV(utt_names)[] = STRING_Tai_Viet0 STRING_Takri0 STRING_Tamil0 + STRING_Tangut0 STRING_Telugu0 STRING_Thaana0 STRING_Thai0 @@ -576,186 +613,197 @@ const char PRIV(utt_names)[] = STRING_Xwd0 STRING_Yi0 STRING_Z0 + STRING_Zanabazar_Square0 STRING_Zl0 STRING_Zp0 STRING_Zs0; const ucp_type_table PRIV(utt)[] = { - { 0, PT_SC, ucp_Ahom }, - { 5, PT_SC, ucp_Anatolian_Hieroglyphs }, - { 27, PT_ANY, 0 }, - { 31, PT_SC, ucp_Arabic }, - { 38, PT_SC, ucp_Armenian }, - { 47, PT_SC, ucp_Avestan }, - { 55, PT_SC, ucp_Balinese }, - { 64, PT_SC, ucp_Bamum }, - { 70, PT_SC, ucp_Bassa_Vah }, - { 80, PT_SC, ucp_Batak }, - { 86, PT_SC, ucp_Bengali }, - { 94, PT_SC, ucp_Bopomofo }, - { 103, PT_SC, ucp_Brahmi }, - { 110, PT_SC, ucp_Braille }, - { 118, PT_SC, ucp_Buginese }, - { 127, PT_SC, ucp_Buhid }, - { 133, PT_GC, ucp_C }, - { 135, PT_SC, ucp_Canadian_Aboriginal }, - { 155, PT_SC, ucp_Carian }, - { 162, PT_SC, ucp_Caucasian_Albanian }, - { 181, PT_PC, ucp_Cc }, - { 184, PT_PC, ucp_Cf }, - { 187, PT_SC, ucp_Chakma }, - { 194, PT_SC, ucp_Cham }, - { 199, PT_SC, ucp_Cherokee }, - { 208, PT_PC, ucp_Cn }, - { 211, PT_PC, ucp_Co }, - { 214, PT_SC, ucp_Common }, - { 221, PT_SC, ucp_Coptic }, - { 228, PT_PC, ucp_Cs }, - { 231, PT_SC, ucp_Cuneiform }, - { 241, PT_SC, ucp_Cypriot }, - { 249, PT_SC, ucp_Cyrillic }, - { 258, PT_SC, ucp_Deseret }, - { 266, PT_SC, ucp_Devanagari }, - { 277, PT_SC, ucp_Duployan }, - { 286, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 307, PT_SC, ucp_Elbasan }, - { 315, PT_SC, ucp_Ethiopic }, - { 324, PT_SC, ucp_Georgian }, - { 333, PT_SC, ucp_Glagolitic }, - { 344, PT_SC, ucp_Gothic }, - { 351, PT_SC, ucp_Grantha }, - { 359, PT_SC, ucp_Greek }, - { 365, PT_SC, ucp_Gujarati }, - { 374, PT_SC, ucp_Gurmukhi }, - { 383, PT_SC, ucp_Han }, - { 387, PT_SC, ucp_Hangul }, - { 394, PT_SC, ucp_Hanunoo }, - { 402, PT_SC, ucp_Hatran }, - { 409, PT_SC, ucp_Hebrew }, - { 416, PT_SC, ucp_Hiragana }, - { 425, PT_SC, ucp_Imperial_Aramaic }, - { 442, PT_SC, ucp_Inherited }, - { 452, PT_SC, ucp_Inscriptional_Pahlavi }, - { 474, PT_SC, ucp_Inscriptional_Parthian }, - { 497, PT_SC, ucp_Javanese }, - { 506, PT_SC, ucp_Kaithi }, - { 513, PT_SC, ucp_Kannada }, - { 521, PT_SC, ucp_Katakana }, - { 530, PT_SC, ucp_Kayah_Li }, - { 539, PT_SC, ucp_Kharoshthi }, - { 550, PT_SC, ucp_Khmer }, - { 556, PT_SC, ucp_Khojki }, - { 563, PT_SC, ucp_Khudawadi }, - { 573, PT_GC, ucp_L }, - { 575, PT_LAMP, 0 }, - { 578, PT_SC, ucp_Lao }, - { 582, PT_SC, ucp_Latin }, - { 588, PT_SC, ucp_Lepcha }, - { 595, PT_SC, ucp_Limbu }, - { 601, PT_SC, ucp_Linear_A }, - { 610, PT_SC, ucp_Linear_B }, - { 619, PT_SC, ucp_Lisu }, - { 624, PT_PC, ucp_Ll }, - { 627, PT_PC, ucp_Lm }, - { 630, PT_PC, ucp_Lo }, - { 633, PT_PC, ucp_Lt }, - { 636, PT_PC, ucp_Lu }, - { 639, PT_SC, ucp_Lycian }, - { 646, PT_SC, ucp_Lydian }, - { 653, PT_GC, ucp_M }, - { 655, PT_SC, ucp_Mahajani }, - { 664, PT_SC, ucp_Malayalam }, - { 674, PT_SC, ucp_Mandaic }, - { 682, PT_SC, ucp_Manichaean }, - { 693, PT_PC, ucp_Mc }, - { 696, PT_PC, ucp_Me }, - { 699, PT_SC, ucp_Meetei_Mayek }, - { 712, PT_SC, ucp_Mende_Kikakui }, - { 726, PT_SC, ucp_Meroitic_Cursive }, - { 743, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 764, PT_SC, ucp_Miao }, - { 769, PT_PC, ucp_Mn }, - { 772, PT_SC, ucp_Modi }, - { 777, PT_SC, ucp_Mongolian }, - { 787, PT_SC, ucp_Mro }, - { 791, PT_SC, ucp_Multani }, - { 799, PT_SC, ucp_Myanmar }, - { 807, PT_GC, ucp_N }, - { 809, PT_SC, ucp_Nabataean }, - { 819, PT_PC, ucp_Nd }, - { 822, PT_SC, ucp_New_Tai_Lue }, - { 834, PT_SC, ucp_Nko }, - { 838, PT_PC, ucp_Nl }, - { 841, PT_PC, ucp_No }, - { 844, PT_SC, ucp_Ogham }, - { 850, PT_SC, ucp_Ol_Chiki }, - { 859, PT_SC, ucp_Old_Hungarian }, - { 873, PT_SC, ucp_Old_Italic }, - { 884, PT_SC, ucp_Old_North_Arabian }, - { 902, PT_SC, ucp_Old_Permic }, - { 913, PT_SC, ucp_Old_Persian }, - { 925, PT_SC, ucp_Old_South_Arabian }, - { 943, PT_SC, ucp_Old_Turkic }, - { 954, PT_SC, ucp_Oriya }, - { 960, PT_SC, ucp_Osmanya }, - { 968, PT_GC, ucp_P }, - { 970, PT_SC, ucp_Pahawh_Hmong }, - { 983, PT_SC, ucp_Palmyrene }, - { 993, PT_SC, ucp_Pau_Cin_Hau }, - { 1005, PT_PC, ucp_Pc }, - { 1008, PT_PC, ucp_Pd }, - { 1011, PT_PC, ucp_Pe }, - { 1014, PT_PC, ucp_Pf }, - { 1017, PT_SC, ucp_Phags_Pa }, - { 1026, PT_SC, ucp_Phoenician }, - { 1037, PT_PC, ucp_Pi }, - { 1040, PT_PC, ucp_Po }, - { 1043, PT_PC, ucp_Ps }, - { 1046, PT_SC, ucp_Psalter_Pahlavi }, - { 1062, PT_SC, ucp_Rejang }, - { 1069, PT_SC, ucp_Runic }, - { 1075, PT_GC, ucp_S }, - { 1077, PT_SC, ucp_Samaritan }, - { 1087, PT_SC, ucp_Saurashtra }, - { 1098, PT_PC, ucp_Sc }, - { 1101, PT_SC, ucp_Sharada }, - { 1109, PT_SC, ucp_Shavian }, - { 1117, PT_SC, ucp_Siddham }, - { 1125, PT_SC, ucp_SignWriting }, - { 1137, PT_SC, ucp_Sinhala }, - { 1145, PT_PC, ucp_Sk }, - { 1148, PT_PC, ucp_Sm }, - { 1151, PT_PC, ucp_So }, - { 1154, PT_SC, ucp_Sora_Sompeng }, - { 1167, PT_SC, ucp_Sundanese }, - { 1177, PT_SC, ucp_Syloti_Nagri }, - { 1190, PT_SC, ucp_Syriac }, - { 1197, PT_SC, ucp_Tagalog }, - { 1205, PT_SC, ucp_Tagbanwa }, - { 1214, PT_SC, ucp_Tai_Le }, - { 1221, PT_SC, ucp_Tai_Tham }, - { 1230, PT_SC, ucp_Tai_Viet }, - { 1239, PT_SC, ucp_Takri }, - { 1245, PT_SC, ucp_Tamil }, - { 1251, PT_SC, ucp_Telugu }, - { 1258, PT_SC, ucp_Thaana }, - { 1265, PT_SC, ucp_Thai }, - { 1270, PT_SC, ucp_Tibetan }, - { 1278, PT_SC, ucp_Tifinagh }, - { 1287, PT_SC, ucp_Tirhuta }, - { 1295, PT_SC, ucp_Ugaritic }, - { 1304, PT_SC, ucp_Vai }, - { 1308, PT_SC, ucp_Warang_Citi }, - { 1320, PT_ALNUM, 0 }, - { 1324, PT_PXSPACE, 0 }, - { 1328, PT_SPACE, 0 }, - { 1332, PT_UCNC, 0 }, - { 1336, PT_WORD, 0 }, - { 1340, PT_SC, ucp_Yi }, - { 1343, PT_GC, ucp_Z }, - { 1345, PT_PC, ucp_Zl }, - { 1348, PT_PC, ucp_Zp }, - { 1351, PT_PC, ucp_Zs } + { 0, PT_SC, ucp_Adlam }, + { 6, PT_SC, ucp_Ahom }, + { 11, PT_SC, ucp_Anatolian_Hieroglyphs }, + { 33, PT_ANY, 0 }, + { 37, PT_SC, ucp_Arabic }, + { 44, PT_SC, ucp_Armenian }, + { 53, PT_SC, ucp_Avestan }, + { 61, PT_SC, ucp_Balinese }, + { 70, PT_SC, ucp_Bamum }, + { 76, PT_SC, ucp_Bassa_Vah }, + { 86, PT_SC, ucp_Batak }, + { 92, PT_SC, ucp_Bengali }, + { 100, PT_SC, ucp_Bhaiksuki }, + { 110, PT_SC, ucp_Bopomofo }, + { 119, PT_SC, ucp_Brahmi }, + { 126, PT_SC, ucp_Braille }, + { 134, PT_SC, ucp_Buginese }, + { 143, PT_SC, ucp_Buhid }, + { 149, PT_GC, ucp_C }, + { 151, PT_SC, ucp_Canadian_Aboriginal }, + { 171, PT_SC, ucp_Carian }, + { 178, PT_SC, ucp_Caucasian_Albanian }, + { 197, PT_PC, ucp_Cc }, + { 200, PT_PC, ucp_Cf }, + { 203, PT_SC, ucp_Chakma }, + { 210, PT_SC, ucp_Cham }, + { 215, PT_SC, ucp_Cherokee }, + { 224, PT_PC, ucp_Cn }, + { 227, PT_PC, ucp_Co }, + { 230, PT_SC, ucp_Common }, + { 237, PT_SC, ucp_Coptic }, + { 244, PT_PC, ucp_Cs }, + { 247, PT_SC, ucp_Cuneiform }, + { 257, PT_SC, ucp_Cypriot }, + { 265, PT_SC, ucp_Cyrillic }, + { 274, PT_SC, ucp_Deseret }, + { 282, PT_SC, ucp_Devanagari }, + { 293, PT_SC, ucp_Duployan }, + { 302, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 323, PT_SC, ucp_Elbasan }, + { 331, PT_SC, ucp_Ethiopic }, + { 340, PT_SC, ucp_Georgian }, + { 349, PT_SC, ucp_Glagolitic }, + { 360, PT_SC, ucp_Gothic }, + { 367, PT_SC, ucp_Grantha }, + { 375, PT_SC, ucp_Greek }, + { 381, PT_SC, ucp_Gujarati }, + { 390, PT_SC, ucp_Gurmukhi }, + { 399, PT_SC, ucp_Han }, + { 403, PT_SC, ucp_Hangul }, + { 410, PT_SC, ucp_Hanunoo }, + { 418, PT_SC, ucp_Hatran }, + { 425, PT_SC, ucp_Hebrew }, + { 432, PT_SC, ucp_Hiragana }, + { 441, PT_SC, ucp_Imperial_Aramaic }, + { 458, PT_SC, ucp_Inherited }, + { 468, PT_SC, ucp_Inscriptional_Pahlavi }, + { 490, PT_SC, ucp_Inscriptional_Parthian }, + { 513, PT_SC, ucp_Javanese }, + { 522, PT_SC, ucp_Kaithi }, + { 529, PT_SC, ucp_Kannada }, + { 537, PT_SC, ucp_Katakana }, + { 546, PT_SC, ucp_Kayah_Li }, + { 555, PT_SC, ucp_Kharoshthi }, + { 566, PT_SC, ucp_Khmer }, + { 572, PT_SC, ucp_Khojki }, + { 579, PT_SC, ucp_Khudawadi }, + { 589, PT_GC, ucp_L }, + { 591, PT_LAMP, 0 }, + { 594, PT_SC, ucp_Lao }, + { 598, PT_SC, ucp_Latin }, + { 604, PT_SC, ucp_Lepcha }, + { 611, PT_SC, ucp_Limbu }, + { 617, PT_SC, ucp_Linear_A }, + { 626, PT_SC, ucp_Linear_B }, + { 635, PT_SC, ucp_Lisu }, + { 640, PT_PC, ucp_Ll }, + { 643, PT_PC, ucp_Lm }, + { 646, PT_PC, ucp_Lo }, + { 649, PT_PC, ucp_Lt }, + { 652, PT_PC, ucp_Lu }, + { 655, PT_SC, ucp_Lycian }, + { 662, PT_SC, ucp_Lydian }, + { 669, PT_GC, ucp_M }, + { 671, PT_SC, ucp_Mahajani }, + { 680, PT_SC, ucp_Malayalam }, + { 690, PT_SC, ucp_Mandaic }, + { 698, PT_SC, ucp_Manichaean }, + { 709, PT_SC, ucp_Marchen }, + { 717, PT_SC, ucp_Masaram_Gondi }, + { 731, PT_PC, ucp_Mc }, + { 734, PT_PC, ucp_Me }, + { 737, PT_SC, ucp_Meetei_Mayek }, + { 750, PT_SC, ucp_Mende_Kikakui }, + { 764, PT_SC, ucp_Meroitic_Cursive }, + { 781, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 802, PT_SC, ucp_Miao }, + { 807, PT_PC, ucp_Mn }, + { 810, PT_SC, ucp_Modi }, + { 815, PT_SC, ucp_Mongolian }, + { 825, PT_SC, ucp_Mro }, + { 829, PT_SC, ucp_Multani }, + { 837, PT_SC, ucp_Myanmar }, + { 845, PT_GC, ucp_N }, + { 847, PT_SC, ucp_Nabataean }, + { 857, PT_PC, ucp_Nd }, + { 860, PT_SC, ucp_New_Tai_Lue }, + { 872, PT_SC, ucp_Newa }, + { 877, PT_SC, ucp_Nko }, + { 881, PT_PC, ucp_Nl }, + { 884, PT_PC, ucp_No }, + { 887, PT_SC, ucp_Nushu }, + { 893, PT_SC, ucp_Ogham }, + { 899, PT_SC, ucp_Ol_Chiki }, + { 908, PT_SC, ucp_Old_Hungarian }, + { 922, PT_SC, ucp_Old_Italic }, + { 933, PT_SC, ucp_Old_North_Arabian }, + { 951, PT_SC, ucp_Old_Permic }, + { 962, PT_SC, ucp_Old_Persian }, + { 974, PT_SC, ucp_Old_South_Arabian }, + { 992, PT_SC, ucp_Old_Turkic }, + { 1003, PT_SC, ucp_Oriya }, + { 1009, PT_SC, ucp_Osage }, + { 1015, PT_SC, ucp_Osmanya }, + { 1023, PT_GC, ucp_P }, + { 1025, PT_SC, ucp_Pahawh_Hmong }, + { 1038, PT_SC, ucp_Palmyrene }, + { 1048, PT_SC, ucp_Pau_Cin_Hau }, + { 1060, PT_PC, ucp_Pc }, + { 1063, PT_PC, ucp_Pd }, + { 1066, PT_PC, ucp_Pe }, + { 1069, PT_PC, ucp_Pf }, + { 1072, PT_SC, ucp_Phags_Pa }, + { 1081, PT_SC, ucp_Phoenician }, + { 1092, PT_PC, ucp_Pi }, + { 1095, PT_PC, ucp_Po }, + { 1098, PT_PC, ucp_Ps }, + { 1101, PT_SC, ucp_Psalter_Pahlavi }, + { 1117, PT_SC, ucp_Rejang }, + { 1124, PT_SC, ucp_Runic }, + { 1130, PT_GC, ucp_S }, + { 1132, PT_SC, ucp_Samaritan }, + { 1142, PT_SC, ucp_Saurashtra }, + { 1153, PT_PC, ucp_Sc }, + { 1156, PT_SC, ucp_Sharada }, + { 1164, PT_SC, ucp_Shavian }, + { 1172, PT_SC, ucp_Siddham }, + { 1180, PT_SC, ucp_SignWriting }, + { 1192, PT_SC, ucp_Sinhala }, + { 1200, PT_PC, ucp_Sk }, + { 1203, PT_PC, ucp_Sm }, + { 1206, PT_PC, ucp_So }, + { 1209, PT_SC, ucp_Sora_Sompeng }, + { 1222, PT_SC, ucp_Soyombo }, + { 1230, PT_SC, ucp_Sundanese }, + { 1240, PT_SC, ucp_Syloti_Nagri }, + { 1253, PT_SC, ucp_Syriac }, + { 1260, PT_SC, ucp_Tagalog }, + { 1268, PT_SC, ucp_Tagbanwa }, + { 1277, PT_SC, ucp_Tai_Le }, + { 1284, PT_SC, ucp_Tai_Tham }, + { 1293, PT_SC, ucp_Tai_Viet }, + { 1302, PT_SC, ucp_Takri }, + { 1308, PT_SC, ucp_Tamil }, + { 1314, PT_SC, ucp_Tangut }, + { 1321, PT_SC, ucp_Telugu }, + { 1328, PT_SC, ucp_Thaana }, + { 1335, PT_SC, ucp_Thai }, + { 1340, PT_SC, ucp_Tibetan }, + { 1348, PT_SC, ucp_Tifinagh }, + { 1357, PT_SC, ucp_Tirhuta }, + { 1365, PT_SC, ucp_Ugaritic }, + { 1374, PT_SC, ucp_Vai }, + { 1378, PT_SC, ucp_Warang_Citi }, + { 1390, PT_ALNUM, 0 }, + { 1394, PT_PXSPACE, 0 }, + { 1398, PT_SPACE, 0 }, + { 1402, PT_UCNC, 0 }, + { 1406, PT_WORD, 0 }, + { 1410, PT_SC, ucp_Yi }, + { 1413, PT_GC, ucp_Z }, + { 1415, PT_SC, ucp_Zanabazar_Square }, + { 1432, PT_PC, ucp_Zl }, + { 1435, PT_PC, ucp_Zp }, + { 1438, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |