diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_tables.c')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_tables.c | 347 |
1 files changed, 181 insertions, 166 deletions
diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index b10de45efb..c164e976e0 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -273,6 +273,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Cs0 STR_C STR_s "\0" #define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" #define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0" +#define STRING_Cypro_Minoan0 STR_C STR_y STR_p STR_r STR_o STR_UNDERSCORE STR_M STR_i STR_n STR_o STR_a STR_n "\0" #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" @@ -371,6 +372,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0" #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" +#define STRING_Old_Uyghur0 STR_O STR_l STR_d STR_UNDERSCORE STR_U STR_y STR_g STR_h STR_u STR_r "\0" #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" #define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0" #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" @@ -415,6 +417,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0" #define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0" #define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0" +#define STRING_Tangsa0 STR_T STR_a STR_n STR_g STR_s STR_a "\0" #define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0" #define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0" #define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0" @@ -422,9 +425,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0" #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" #define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0" +#define STRING_Toto0 STR_T STR_o STR_t STR_o "\0" #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" #define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0" #define STRING_Vai0 STR_V STR_a STR_i "\0" +#define STRING_Vithkuqi0 STR_V STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0" #define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0" #define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0" #define STRING_Xan0 STR_X STR_a STR_n "\0" @@ -476,6 +481,7 @@ const char PRIV(utt_names)[] = STRING_Cs0 STRING_Cuneiform0 STRING_Cypriot0 + STRING_Cypro_Minoan0 STRING_Cyrillic0 STRING_Deseret0 STRING_Devanagari0 @@ -574,6 +580,7 @@ const char PRIV(utt_names)[] = STRING_Old_Sogdian0 STRING_Old_South_Arabian0 STRING_Old_Turkic0 + STRING_Old_Uyghur0 STRING_Oriya0 STRING_Osage0 STRING_Osmanya0 @@ -618,6 +625,7 @@ const char PRIV(utt_names)[] = STRING_Tai_Viet0 STRING_Takri0 STRING_Tamil0 + STRING_Tangsa0 STRING_Tangut0 STRING_Telugu0 STRING_Thaana0 @@ -625,9 +633,11 @@ const char PRIV(utt_names)[] = STRING_Tibetan0 STRING_Tifinagh0 STRING_Tirhuta0 + STRING_Toto0 STRING_Ugaritic0 STRING_Unknown0 STRING_Vai0 + STRING_Vithkuqi0 STRING_Wancho0 STRING_Warang_Citi0 STRING_Xan0 @@ -679,172 +689,177 @@ const ucp_type_table PRIV(utt)[] = { { 255, PT_PC, ucp_Cs }, { 258, PT_SC, ucp_Cuneiform }, { 268, PT_SC, ucp_Cypriot }, - { 276, PT_SC, ucp_Cyrillic }, - { 285, PT_SC, ucp_Deseret }, - { 293, PT_SC, ucp_Devanagari }, - { 304, PT_SC, ucp_Dives_Akuru }, - { 316, PT_SC, ucp_Dogra }, - { 322, PT_SC, ucp_Duployan }, - { 331, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 352, PT_SC, ucp_Elbasan }, - { 360, PT_SC, ucp_Elymaic }, - { 368, PT_SC, ucp_Ethiopic }, - { 377, PT_SC, ucp_Georgian }, - { 386, PT_SC, ucp_Glagolitic }, - { 397, PT_SC, ucp_Gothic }, - { 404, PT_SC, ucp_Grantha }, - { 412, PT_SC, ucp_Greek }, - { 418, PT_SC, ucp_Gujarati }, - { 427, PT_SC, ucp_Gunjala_Gondi }, - { 441, PT_SC, ucp_Gurmukhi }, - { 450, PT_SC, ucp_Han }, - { 454, PT_SC, ucp_Hangul }, - { 461, PT_SC, ucp_Hanifi_Rohingya }, - { 477, PT_SC, ucp_Hanunoo }, - { 485, PT_SC, ucp_Hatran }, - { 492, PT_SC, ucp_Hebrew }, - { 499, PT_SC, ucp_Hiragana }, - { 508, PT_SC, ucp_Imperial_Aramaic }, - { 525, PT_SC, ucp_Inherited }, - { 535, PT_SC, ucp_Inscriptional_Pahlavi }, - { 557, PT_SC, ucp_Inscriptional_Parthian }, - { 580, PT_SC, ucp_Javanese }, - { 589, PT_SC, ucp_Kaithi }, - { 596, PT_SC, ucp_Kannada }, - { 604, PT_SC, ucp_Katakana }, - { 613, PT_SC, ucp_Kayah_Li }, - { 622, PT_SC, ucp_Kharoshthi }, - { 633, PT_SC, ucp_Khitan_Small_Script }, - { 653, PT_SC, ucp_Khmer }, - { 659, PT_SC, ucp_Khojki }, - { 666, PT_SC, ucp_Khudawadi }, - { 676, PT_GC, ucp_L }, - { 678, PT_LAMP, 0 }, - { 681, PT_SC, ucp_Lao }, - { 685, PT_SC, ucp_Latin }, - { 691, PT_SC, ucp_Lepcha }, - { 698, PT_SC, ucp_Limbu }, - { 704, PT_SC, ucp_Linear_A }, - { 713, PT_SC, ucp_Linear_B }, - { 722, PT_SC, ucp_Lisu }, - { 727, PT_PC, ucp_Ll }, - { 730, PT_PC, ucp_Lm }, - { 733, PT_PC, ucp_Lo }, - { 736, PT_PC, ucp_Lt }, - { 739, PT_PC, ucp_Lu }, - { 742, PT_SC, ucp_Lycian }, - { 749, PT_SC, ucp_Lydian }, - { 756, PT_GC, ucp_M }, - { 758, PT_SC, ucp_Mahajani }, - { 767, PT_SC, ucp_Makasar }, - { 775, PT_SC, ucp_Malayalam }, - { 785, PT_SC, ucp_Mandaic }, - { 793, PT_SC, ucp_Manichaean }, - { 804, PT_SC, ucp_Marchen }, - { 812, PT_SC, ucp_Masaram_Gondi }, - { 826, PT_PC, ucp_Mc }, - { 829, PT_PC, ucp_Me }, - { 832, PT_SC, ucp_Medefaidrin }, - { 844, PT_SC, ucp_Meetei_Mayek }, - { 857, PT_SC, ucp_Mende_Kikakui }, - { 871, PT_SC, ucp_Meroitic_Cursive }, - { 888, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 909, PT_SC, ucp_Miao }, - { 914, PT_PC, ucp_Mn }, - { 917, PT_SC, ucp_Modi }, - { 922, PT_SC, ucp_Mongolian }, - { 932, PT_SC, ucp_Mro }, - { 936, PT_SC, ucp_Multani }, - { 944, PT_SC, ucp_Myanmar }, - { 952, PT_GC, ucp_N }, - { 954, PT_SC, ucp_Nabataean }, - { 964, PT_SC, ucp_Nandinagari }, - { 976, PT_PC, ucp_Nd }, - { 979, PT_SC, ucp_New_Tai_Lue }, - { 991, PT_SC, ucp_Newa }, - { 996, PT_SC, ucp_Nko }, - { 1000, PT_PC, ucp_Nl }, - { 1003, PT_PC, ucp_No }, - { 1006, PT_SC, ucp_Nushu }, - { 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 1035, PT_SC, ucp_Ogham }, - { 1041, PT_SC, ucp_Ol_Chiki }, - { 1050, PT_SC, ucp_Old_Hungarian }, - { 1064, PT_SC, ucp_Old_Italic }, - { 1075, PT_SC, ucp_Old_North_Arabian }, - { 1093, PT_SC, ucp_Old_Permic }, - { 1104, PT_SC, ucp_Old_Persian }, - { 1116, PT_SC, ucp_Old_Sogdian }, - { 1128, PT_SC, ucp_Old_South_Arabian }, - { 1146, PT_SC, ucp_Old_Turkic }, - { 1157, PT_SC, ucp_Oriya }, - { 1163, PT_SC, ucp_Osage }, - { 1169, PT_SC, ucp_Osmanya }, - { 1177, PT_GC, ucp_P }, - { 1179, PT_SC, ucp_Pahawh_Hmong }, - { 1192, PT_SC, ucp_Palmyrene }, - { 1202, PT_SC, ucp_Pau_Cin_Hau }, - { 1214, PT_PC, ucp_Pc }, - { 1217, PT_PC, ucp_Pd }, - { 1220, PT_PC, ucp_Pe }, - { 1223, PT_PC, ucp_Pf }, - { 1226, PT_SC, ucp_Phags_Pa }, - { 1235, PT_SC, ucp_Phoenician }, - { 1246, PT_PC, ucp_Pi }, - { 1249, PT_PC, ucp_Po }, - { 1252, PT_PC, ucp_Ps }, - { 1255, PT_SC, ucp_Psalter_Pahlavi }, - { 1271, PT_SC, ucp_Rejang }, - { 1278, PT_SC, ucp_Runic }, - { 1284, PT_GC, ucp_S }, - { 1286, PT_SC, ucp_Samaritan }, - { 1296, PT_SC, ucp_Saurashtra }, - { 1307, PT_PC, ucp_Sc }, - { 1310, PT_SC, ucp_Sharada }, - { 1318, PT_SC, ucp_Shavian }, - { 1326, PT_SC, ucp_Siddham }, - { 1334, PT_SC, ucp_SignWriting }, - { 1346, PT_SC, ucp_Sinhala }, - { 1354, PT_PC, ucp_Sk }, - { 1357, PT_PC, ucp_Sm }, - { 1360, PT_PC, ucp_So }, - { 1363, PT_SC, ucp_Sogdian }, - { 1371, PT_SC, ucp_Sora_Sompeng }, - { 1384, PT_SC, ucp_Soyombo }, - { 1392, PT_SC, ucp_Sundanese }, - { 1402, PT_SC, ucp_Syloti_Nagri }, - { 1415, PT_SC, ucp_Syriac }, - { 1422, PT_SC, ucp_Tagalog }, - { 1430, PT_SC, ucp_Tagbanwa }, - { 1439, PT_SC, ucp_Tai_Le }, - { 1446, PT_SC, ucp_Tai_Tham }, - { 1455, PT_SC, ucp_Tai_Viet }, - { 1464, PT_SC, ucp_Takri }, - { 1470, PT_SC, ucp_Tamil }, - { 1476, PT_SC, ucp_Tangut }, - { 1483, PT_SC, ucp_Telugu }, - { 1490, PT_SC, ucp_Thaana }, - { 1497, PT_SC, ucp_Thai }, - { 1502, PT_SC, ucp_Tibetan }, - { 1510, PT_SC, ucp_Tifinagh }, - { 1519, PT_SC, ucp_Tirhuta }, - { 1527, PT_SC, ucp_Ugaritic }, - { 1536, PT_SC, ucp_Unknown }, - { 1544, PT_SC, ucp_Vai }, - { 1548, PT_SC, ucp_Wancho }, - { 1555, PT_SC, ucp_Warang_Citi }, - { 1567, PT_ALNUM, 0 }, - { 1571, PT_PXSPACE, 0 }, - { 1575, PT_SPACE, 0 }, - { 1579, PT_UCNC, 0 }, - { 1583, PT_WORD, 0 }, - { 1587, PT_SC, ucp_Yezidi }, - { 1594, PT_SC, ucp_Yi }, - { 1597, PT_GC, ucp_Z }, - { 1599, PT_SC, ucp_Zanabazar_Square }, - { 1616, PT_PC, ucp_Zl }, - { 1619, PT_PC, ucp_Zp }, - { 1622, PT_PC, ucp_Zs } + { 276, PT_SC, ucp_Cypro_Minoan }, + { 289, PT_SC, ucp_Cyrillic }, + { 298, PT_SC, ucp_Deseret }, + { 306, PT_SC, ucp_Devanagari }, + { 317, PT_SC, ucp_Dives_Akuru }, + { 329, PT_SC, ucp_Dogra }, + { 335, PT_SC, ucp_Duployan }, + { 344, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 365, PT_SC, ucp_Elbasan }, + { 373, PT_SC, ucp_Elymaic }, + { 381, PT_SC, ucp_Ethiopic }, + { 390, PT_SC, ucp_Georgian }, + { 399, PT_SC, ucp_Glagolitic }, + { 410, PT_SC, ucp_Gothic }, + { 417, PT_SC, ucp_Grantha }, + { 425, PT_SC, ucp_Greek }, + { 431, PT_SC, ucp_Gujarati }, + { 440, PT_SC, ucp_Gunjala_Gondi }, + { 454, PT_SC, ucp_Gurmukhi }, + { 463, PT_SC, ucp_Han }, + { 467, PT_SC, ucp_Hangul }, + { 474, PT_SC, ucp_Hanifi_Rohingya }, + { 490, PT_SC, ucp_Hanunoo }, + { 498, PT_SC, ucp_Hatran }, + { 505, PT_SC, ucp_Hebrew }, + { 512, PT_SC, ucp_Hiragana }, + { 521, PT_SC, ucp_Imperial_Aramaic }, + { 538, PT_SC, ucp_Inherited }, + { 548, PT_SC, ucp_Inscriptional_Pahlavi }, + { 570, PT_SC, ucp_Inscriptional_Parthian }, + { 593, PT_SC, ucp_Javanese }, + { 602, PT_SC, ucp_Kaithi }, + { 609, PT_SC, ucp_Kannada }, + { 617, PT_SC, ucp_Katakana }, + { 626, PT_SC, ucp_Kayah_Li }, + { 635, PT_SC, ucp_Kharoshthi }, + { 646, PT_SC, ucp_Khitan_Small_Script }, + { 666, PT_SC, ucp_Khmer }, + { 672, PT_SC, ucp_Khojki }, + { 679, PT_SC, ucp_Khudawadi }, + { 689, PT_GC, ucp_L }, + { 691, PT_LAMP, 0 }, + { 694, PT_SC, ucp_Lao }, + { 698, PT_SC, ucp_Latin }, + { 704, PT_SC, ucp_Lepcha }, + { 711, PT_SC, ucp_Limbu }, + { 717, PT_SC, ucp_Linear_A }, + { 726, PT_SC, ucp_Linear_B }, + { 735, PT_SC, ucp_Lisu }, + { 740, PT_PC, ucp_Ll }, + { 743, PT_PC, ucp_Lm }, + { 746, PT_PC, ucp_Lo }, + { 749, PT_PC, ucp_Lt }, + { 752, PT_PC, ucp_Lu }, + { 755, PT_SC, ucp_Lycian }, + { 762, PT_SC, ucp_Lydian }, + { 769, PT_GC, ucp_M }, + { 771, PT_SC, ucp_Mahajani }, + { 780, PT_SC, ucp_Makasar }, + { 788, PT_SC, ucp_Malayalam }, + { 798, PT_SC, ucp_Mandaic }, + { 806, PT_SC, ucp_Manichaean }, + { 817, PT_SC, ucp_Marchen }, + { 825, PT_SC, ucp_Masaram_Gondi }, + { 839, PT_PC, ucp_Mc }, + { 842, PT_PC, ucp_Me }, + { 845, PT_SC, ucp_Medefaidrin }, + { 857, PT_SC, ucp_Meetei_Mayek }, + { 870, PT_SC, ucp_Mende_Kikakui }, + { 884, PT_SC, ucp_Meroitic_Cursive }, + { 901, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 922, PT_SC, ucp_Miao }, + { 927, PT_PC, ucp_Mn }, + { 930, PT_SC, ucp_Modi }, + { 935, PT_SC, ucp_Mongolian }, + { 945, PT_SC, ucp_Mro }, + { 949, PT_SC, ucp_Multani }, + { 957, PT_SC, ucp_Myanmar }, + { 965, PT_GC, ucp_N }, + { 967, PT_SC, ucp_Nabataean }, + { 977, PT_SC, ucp_Nandinagari }, + { 989, PT_PC, ucp_Nd }, + { 992, PT_SC, ucp_New_Tai_Lue }, + { 1004, PT_SC, ucp_Newa }, + { 1009, PT_SC, ucp_Nko }, + { 1013, PT_PC, ucp_Nl }, + { 1016, PT_PC, ucp_No }, + { 1019, PT_SC, ucp_Nushu }, + { 1025, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 1048, PT_SC, ucp_Ogham }, + { 1054, PT_SC, ucp_Ol_Chiki }, + { 1063, PT_SC, ucp_Old_Hungarian }, + { 1077, PT_SC, ucp_Old_Italic }, + { 1088, PT_SC, ucp_Old_North_Arabian }, + { 1106, PT_SC, ucp_Old_Permic }, + { 1117, PT_SC, ucp_Old_Persian }, + { 1129, PT_SC, ucp_Old_Sogdian }, + { 1141, PT_SC, ucp_Old_South_Arabian }, + { 1159, PT_SC, ucp_Old_Turkic }, + { 1170, PT_SC, ucp_Old_Uyghur }, + { 1181, PT_SC, ucp_Oriya }, + { 1187, PT_SC, ucp_Osage }, + { 1193, PT_SC, ucp_Osmanya }, + { 1201, PT_GC, ucp_P }, + { 1203, PT_SC, ucp_Pahawh_Hmong }, + { 1216, PT_SC, ucp_Palmyrene }, + { 1226, PT_SC, ucp_Pau_Cin_Hau }, + { 1238, PT_PC, ucp_Pc }, + { 1241, PT_PC, ucp_Pd }, + { 1244, PT_PC, ucp_Pe }, + { 1247, PT_PC, ucp_Pf }, + { 1250, PT_SC, ucp_Phags_Pa }, + { 1259, PT_SC, ucp_Phoenician }, + { 1270, PT_PC, ucp_Pi }, + { 1273, PT_PC, ucp_Po }, + { 1276, PT_PC, ucp_Ps }, + { 1279, PT_SC, ucp_Psalter_Pahlavi }, + { 1295, PT_SC, ucp_Rejang }, + { 1302, PT_SC, ucp_Runic }, + { 1308, PT_GC, ucp_S }, + { 1310, PT_SC, ucp_Samaritan }, + { 1320, PT_SC, ucp_Saurashtra }, + { 1331, PT_PC, ucp_Sc }, + { 1334, PT_SC, ucp_Sharada }, + { 1342, PT_SC, ucp_Shavian }, + { 1350, PT_SC, ucp_Siddham }, + { 1358, PT_SC, ucp_SignWriting }, + { 1370, PT_SC, ucp_Sinhala }, + { 1378, PT_PC, ucp_Sk }, + { 1381, PT_PC, ucp_Sm }, + { 1384, PT_PC, ucp_So }, + { 1387, PT_SC, ucp_Sogdian }, + { 1395, PT_SC, ucp_Sora_Sompeng }, + { 1408, PT_SC, ucp_Soyombo }, + { 1416, PT_SC, ucp_Sundanese }, + { 1426, PT_SC, ucp_Syloti_Nagri }, + { 1439, PT_SC, ucp_Syriac }, + { 1446, PT_SC, ucp_Tagalog }, + { 1454, PT_SC, ucp_Tagbanwa }, + { 1463, PT_SC, ucp_Tai_Le }, + { 1470, PT_SC, ucp_Tai_Tham }, + { 1479, PT_SC, ucp_Tai_Viet }, + { 1488, PT_SC, ucp_Takri }, + { 1494, PT_SC, ucp_Tamil }, + { 1500, PT_SC, ucp_Tangsa }, + { 1507, PT_SC, ucp_Tangut }, + { 1514, PT_SC, ucp_Telugu }, + { 1521, PT_SC, ucp_Thaana }, + { 1528, PT_SC, ucp_Thai }, + { 1533, PT_SC, ucp_Tibetan }, + { 1541, PT_SC, ucp_Tifinagh }, + { 1550, PT_SC, ucp_Tirhuta }, + { 1558, PT_SC, ucp_Toto }, + { 1563, PT_SC, ucp_Ugaritic }, + { 1572, PT_SC, ucp_Unknown }, + { 1580, PT_SC, ucp_Vai }, + { 1584, PT_SC, ucp_Vithkuqi }, + { 1593, PT_SC, ucp_Wancho }, + { 1600, PT_SC, ucp_Warang_Citi }, + { 1612, PT_ALNUM, 0 }, + { 1616, PT_PXSPACE, 0 }, + { 1620, PT_SPACE, 0 }, + { 1624, PT_UCNC, 0 }, + { 1628, PT_WORD, 0 }, + { 1632, PT_SC, ucp_Yezidi }, + { 1639, PT_SC, ucp_Yi }, + { 1642, PT_GC, ucp_Z }, + { 1644, PT_SC, ucp_Zanabazar_Square }, + { 1661, PT_PC, ucp_Zl }, + { 1664, PT_PC, ucp_Zp }, + { 1667, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |