diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_tables.c')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_tables.c | 352 |
1 files changed, 182 insertions, 170 deletions
diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index 25531d98c6..b10de45efb 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0" #define STRING_Cham0 STR_C STR_h STR_a STR_m "\0" #define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" +#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0" #define STRING_Cn0 STR_C STR_n "\0" #define STRING_Co0 STR_C STR_o "\0" #define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0" @@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" +#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0" #define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0" #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" @@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0" #define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0" #define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" +#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0" #define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0" #define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0" #define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" @@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Xsp0 STR_X STR_s STR_p "\0" #define STRING_Xuc0 STR_X STR_u STR_c "\0" #define STRING_Xwd0 STR_X STR_w STR_d "\0" +#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0" #define STRING_Yi0 STR_Y STR_i "\0" #define STRING_Z0 STR_Z "\0" #define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0" @@ -464,6 +468,7 @@ const char PRIV(utt_names)[] = STRING_Chakma0 STRING_Cham0 STRING_Cherokee0 + STRING_Chorasmian0 STRING_Cn0 STRING_Co0 STRING_Common0 @@ -474,6 +479,7 @@ const char PRIV(utt_names)[] = STRING_Cyrillic0 STRING_Deseret0 STRING_Devanagari0 + STRING_Dives_Akuru0 STRING_Dogra0 STRING_Duployan0 STRING_Egyptian_Hieroglyphs0 @@ -505,6 +511,7 @@ const char PRIV(utt_names)[] = STRING_Katakana0 STRING_Kayah_Li0 STRING_Kharoshthi0 + STRING_Khitan_Small_Script0 STRING_Khmer0 STRING_Khojki0 STRING_Khudawadi0 @@ -628,6 +635,7 @@ const char PRIV(utt_names)[] = STRING_Xsp0 STRING_Xuc0 STRING_Xwd0 + STRING_Yezidi0 STRING_Yi0 STRING_Z0 STRING_Zanabazar_Square0 @@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = { { 203, PT_SC, ucp_Chakma }, { 210, PT_SC, ucp_Cham }, { 215, PT_SC, ucp_Cherokee }, - { 224, PT_PC, ucp_Cn }, - { 227, PT_PC, ucp_Co }, - { 230, PT_SC, ucp_Common }, - { 237, PT_SC, ucp_Coptic }, - { 244, PT_PC, ucp_Cs }, - { 247, PT_SC, ucp_Cuneiform }, - { 257, PT_SC, ucp_Cypriot }, - { 265, PT_SC, ucp_Cyrillic }, - { 274, PT_SC, ucp_Deseret }, - { 282, PT_SC, ucp_Devanagari }, - { 293, PT_SC, ucp_Dogra }, - { 299, PT_SC, ucp_Duployan }, - { 308, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 329, PT_SC, ucp_Elbasan }, - { 337, PT_SC, ucp_Elymaic }, - { 345, PT_SC, ucp_Ethiopic }, - { 354, PT_SC, ucp_Georgian }, - { 363, PT_SC, ucp_Glagolitic }, - { 374, PT_SC, ucp_Gothic }, - { 381, PT_SC, ucp_Grantha }, - { 389, PT_SC, ucp_Greek }, - { 395, PT_SC, ucp_Gujarati }, - { 404, PT_SC, ucp_Gunjala_Gondi }, - { 418, PT_SC, ucp_Gurmukhi }, - { 427, PT_SC, ucp_Han }, - { 431, PT_SC, ucp_Hangul }, - { 438, PT_SC, ucp_Hanifi_Rohingya }, - { 454, PT_SC, ucp_Hanunoo }, - { 462, PT_SC, ucp_Hatran }, - { 469, PT_SC, ucp_Hebrew }, - { 476, PT_SC, ucp_Hiragana }, - { 485, PT_SC, ucp_Imperial_Aramaic }, - { 502, PT_SC, ucp_Inherited }, - { 512, PT_SC, ucp_Inscriptional_Pahlavi }, - { 534, PT_SC, ucp_Inscriptional_Parthian }, - { 557, PT_SC, ucp_Javanese }, - { 566, PT_SC, ucp_Kaithi }, - { 573, PT_SC, ucp_Kannada }, - { 581, PT_SC, ucp_Katakana }, - { 590, PT_SC, ucp_Kayah_Li }, - { 599, PT_SC, ucp_Kharoshthi }, - { 610, PT_SC, ucp_Khmer }, - { 616, PT_SC, ucp_Khojki }, - { 623, PT_SC, ucp_Khudawadi }, - { 633, PT_GC, ucp_L }, - { 635, PT_LAMP, 0 }, - { 638, PT_SC, ucp_Lao }, - { 642, PT_SC, ucp_Latin }, - { 648, PT_SC, ucp_Lepcha }, - { 655, PT_SC, ucp_Limbu }, - { 661, PT_SC, ucp_Linear_A }, - { 670, PT_SC, ucp_Linear_B }, - { 679, PT_SC, ucp_Lisu }, - { 684, PT_PC, ucp_Ll }, - { 687, PT_PC, ucp_Lm }, - { 690, PT_PC, ucp_Lo }, - { 693, PT_PC, ucp_Lt }, - { 696, PT_PC, ucp_Lu }, - { 699, PT_SC, ucp_Lycian }, - { 706, PT_SC, ucp_Lydian }, - { 713, PT_GC, ucp_M }, - { 715, PT_SC, ucp_Mahajani }, - { 724, PT_SC, ucp_Makasar }, - { 732, PT_SC, ucp_Malayalam }, - { 742, PT_SC, ucp_Mandaic }, - { 750, PT_SC, ucp_Manichaean }, - { 761, PT_SC, ucp_Marchen }, - { 769, PT_SC, ucp_Masaram_Gondi }, - { 783, PT_PC, ucp_Mc }, - { 786, PT_PC, ucp_Me }, - { 789, PT_SC, ucp_Medefaidrin }, - { 801, PT_SC, ucp_Meetei_Mayek }, - { 814, PT_SC, ucp_Mende_Kikakui }, - { 828, PT_SC, ucp_Meroitic_Cursive }, - { 845, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 866, PT_SC, ucp_Miao }, - { 871, PT_PC, ucp_Mn }, - { 874, PT_SC, ucp_Modi }, - { 879, PT_SC, ucp_Mongolian }, - { 889, PT_SC, ucp_Mro }, - { 893, PT_SC, ucp_Multani }, - { 901, PT_SC, ucp_Myanmar }, - { 909, PT_GC, ucp_N }, - { 911, PT_SC, ucp_Nabataean }, - { 921, PT_SC, ucp_Nandinagari }, - { 933, PT_PC, ucp_Nd }, - { 936, PT_SC, ucp_New_Tai_Lue }, - { 948, PT_SC, ucp_Newa }, - { 953, PT_SC, ucp_Nko }, - { 957, PT_PC, ucp_Nl }, - { 960, PT_PC, ucp_No }, - { 963, PT_SC, ucp_Nushu }, - { 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 992, PT_SC, ucp_Ogham }, - { 998, PT_SC, ucp_Ol_Chiki }, - { 1007, PT_SC, ucp_Old_Hungarian }, - { 1021, PT_SC, ucp_Old_Italic }, - { 1032, PT_SC, ucp_Old_North_Arabian }, - { 1050, PT_SC, ucp_Old_Permic }, - { 1061, PT_SC, ucp_Old_Persian }, - { 1073, PT_SC, ucp_Old_Sogdian }, - { 1085, PT_SC, ucp_Old_South_Arabian }, - { 1103, PT_SC, ucp_Old_Turkic }, - { 1114, PT_SC, ucp_Oriya }, - { 1120, PT_SC, ucp_Osage }, - { 1126, PT_SC, ucp_Osmanya }, - { 1134, PT_GC, ucp_P }, - { 1136, PT_SC, ucp_Pahawh_Hmong }, - { 1149, PT_SC, ucp_Palmyrene }, - { 1159, PT_SC, ucp_Pau_Cin_Hau }, - { 1171, PT_PC, ucp_Pc }, - { 1174, PT_PC, ucp_Pd }, - { 1177, PT_PC, ucp_Pe }, - { 1180, PT_PC, ucp_Pf }, - { 1183, PT_SC, ucp_Phags_Pa }, - { 1192, PT_SC, ucp_Phoenician }, - { 1203, PT_PC, ucp_Pi }, - { 1206, PT_PC, ucp_Po }, - { 1209, PT_PC, ucp_Ps }, - { 1212, PT_SC, ucp_Psalter_Pahlavi }, - { 1228, PT_SC, ucp_Rejang }, - { 1235, PT_SC, ucp_Runic }, - { 1241, PT_GC, ucp_S }, - { 1243, PT_SC, ucp_Samaritan }, - { 1253, PT_SC, ucp_Saurashtra }, - { 1264, PT_PC, ucp_Sc }, - { 1267, PT_SC, ucp_Sharada }, - { 1275, PT_SC, ucp_Shavian }, - { 1283, PT_SC, ucp_Siddham }, - { 1291, PT_SC, ucp_SignWriting }, - { 1303, PT_SC, ucp_Sinhala }, - { 1311, PT_PC, ucp_Sk }, - { 1314, PT_PC, ucp_Sm }, - { 1317, PT_PC, ucp_So }, - { 1320, PT_SC, ucp_Sogdian }, - { 1328, PT_SC, ucp_Sora_Sompeng }, - { 1341, PT_SC, ucp_Soyombo }, - { 1349, PT_SC, ucp_Sundanese }, - { 1359, PT_SC, ucp_Syloti_Nagri }, - { 1372, PT_SC, ucp_Syriac }, - { 1379, PT_SC, ucp_Tagalog }, - { 1387, PT_SC, ucp_Tagbanwa }, - { 1396, PT_SC, ucp_Tai_Le }, - { 1403, PT_SC, ucp_Tai_Tham }, - { 1412, PT_SC, ucp_Tai_Viet }, - { 1421, PT_SC, ucp_Takri }, - { 1427, PT_SC, ucp_Tamil }, - { 1433, PT_SC, ucp_Tangut }, - { 1440, PT_SC, ucp_Telugu }, - { 1447, PT_SC, ucp_Thaana }, - { 1454, PT_SC, ucp_Thai }, - { 1459, PT_SC, ucp_Tibetan }, - { 1467, PT_SC, ucp_Tifinagh }, - { 1476, PT_SC, ucp_Tirhuta }, - { 1484, PT_SC, ucp_Ugaritic }, - { 1493, PT_SC, ucp_Unknown }, - { 1501, PT_SC, ucp_Vai }, - { 1505, PT_SC, ucp_Wancho }, - { 1512, PT_SC, ucp_Warang_Citi }, - { 1524, PT_ALNUM, 0 }, - { 1528, PT_PXSPACE, 0 }, - { 1532, PT_SPACE, 0 }, - { 1536, PT_UCNC, 0 }, - { 1540, PT_WORD, 0 }, - { 1544, PT_SC, ucp_Yi }, - { 1547, PT_GC, ucp_Z }, - { 1549, PT_SC, ucp_Zanabazar_Square }, - { 1566, PT_PC, ucp_Zl }, - { 1569, PT_PC, ucp_Zp }, - { 1572, PT_PC, ucp_Zs } + { 224, PT_SC, ucp_Chorasmian }, + { 235, PT_PC, ucp_Cn }, + { 238, PT_PC, ucp_Co }, + { 241, PT_SC, ucp_Common }, + { 248, PT_SC, ucp_Coptic }, + { 255, PT_PC, ucp_Cs }, + { 258, PT_SC, ucp_Cuneiform }, + { 268, PT_SC, ucp_Cypriot }, + { 276, PT_SC, ucp_Cyrillic }, + { 285, PT_SC, ucp_Deseret }, + { 293, PT_SC, ucp_Devanagari }, + { 304, PT_SC, ucp_Dives_Akuru }, + { 316, PT_SC, ucp_Dogra }, + { 322, PT_SC, ucp_Duployan }, + { 331, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 352, PT_SC, ucp_Elbasan }, + { 360, PT_SC, ucp_Elymaic }, + { 368, PT_SC, ucp_Ethiopic }, + { 377, PT_SC, ucp_Georgian }, + { 386, PT_SC, ucp_Glagolitic }, + { 397, PT_SC, ucp_Gothic }, + { 404, PT_SC, ucp_Grantha }, + { 412, PT_SC, ucp_Greek }, + { 418, PT_SC, ucp_Gujarati }, + { 427, PT_SC, ucp_Gunjala_Gondi }, + { 441, PT_SC, ucp_Gurmukhi }, + { 450, PT_SC, ucp_Han }, + { 454, PT_SC, ucp_Hangul }, + { 461, PT_SC, ucp_Hanifi_Rohingya }, + { 477, PT_SC, ucp_Hanunoo }, + { 485, PT_SC, ucp_Hatran }, + { 492, PT_SC, ucp_Hebrew }, + { 499, PT_SC, ucp_Hiragana }, + { 508, PT_SC, ucp_Imperial_Aramaic }, + { 525, PT_SC, ucp_Inherited }, + { 535, PT_SC, ucp_Inscriptional_Pahlavi }, + { 557, PT_SC, ucp_Inscriptional_Parthian }, + { 580, PT_SC, ucp_Javanese }, + { 589, PT_SC, ucp_Kaithi }, + { 596, PT_SC, ucp_Kannada }, + { 604, PT_SC, ucp_Katakana }, + { 613, PT_SC, ucp_Kayah_Li }, + { 622, PT_SC, ucp_Kharoshthi }, + { 633, PT_SC, ucp_Khitan_Small_Script }, + { 653, PT_SC, ucp_Khmer }, + { 659, PT_SC, ucp_Khojki }, + { 666, PT_SC, ucp_Khudawadi }, + { 676, PT_GC, ucp_L }, + { 678, PT_LAMP, 0 }, + { 681, PT_SC, ucp_Lao }, + { 685, PT_SC, ucp_Latin }, + { 691, PT_SC, ucp_Lepcha }, + { 698, PT_SC, ucp_Limbu }, + { 704, PT_SC, ucp_Linear_A }, + { 713, PT_SC, ucp_Linear_B }, + { 722, PT_SC, ucp_Lisu }, + { 727, PT_PC, ucp_Ll }, + { 730, PT_PC, ucp_Lm }, + { 733, PT_PC, ucp_Lo }, + { 736, PT_PC, ucp_Lt }, + { 739, PT_PC, ucp_Lu }, + { 742, PT_SC, ucp_Lycian }, + { 749, PT_SC, ucp_Lydian }, + { 756, PT_GC, ucp_M }, + { 758, PT_SC, ucp_Mahajani }, + { 767, PT_SC, ucp_Makasar }, + { 775, PT_SC, ucp_Malayalam }, + { 785, PT_SC, ucp_Mandaic }, + { 793, PT_SC, ucp_Manichaean }, + { 804, PT_SC, ucp_Marchen }, + { 812, PT_SC, ucp_Masaram_Gondi }, + { 826, PT_PC, ucp_Mc }, + { 829, PT_PC, ucp_Me }, + { 832, PT_SC, ucp_Medefaidrin }, + { 844, PT_SC, ucp_Meetei_Mayek }, + { 857, PT_SC, ucp_Mende_Kikakui }, + { 871, PT_SC, ucp_Meroitic_Cursive }, + { 888, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 909, PT_SC, ucp_Miao }, + { 914, PT_PC, ucp_Mn }, + { 917, PT_SC, ucp_Modi }, + { 922, PT_SC, ucp_Mongolian }, + { 932, PT_SC, ucp_Mro }, + { 936, PT_SC, ucp_Multani }, + { 944, PT_SC, ucp_Myanmar }, + { 952, PT_GC, ucp_N }, + { 954, PT_SC, ucp_Nabataean }, + { 964, PT_SC, ucp_Nandinagari }, + { 976, PT_PC, ucp_Nd }, + { 979, PT_SC, ucp_New_Tai_Lue }, + { 991, PT_SC, ucp_Newa }, + { 996, PT_SC, ucp_Nko }, + { 1000, PT_PC, ucp_Nl }, + { 1003, PT_PC, ucp_No }, + { 1006, PT_SC, ucp_Nushu }, + { 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 1035, PT_SC, ucp_Ogham }, + { 1041, PT_SC, ucp_Ol_Chiki }, + { 1050, PT_SC, ucp_Old_Hungarian }, + { 1064, PT_SC, ucp_Old_Italic }, + { 1075, PT_SC, ucp_Old_North_Arabian }, + { 1093, PT_SC, ucp_Old_Permic }, + { 1104, PT_SC, ucp_Old_Persian }, + { 1116, PT_SC, ucp_Old_Sogdian }, + { 1128, PT_SC, ucp_Old_South_Arabian }, + { 1146, PT_SC, ucp_Old_Turkic }, + { 1157, PT_SC, ucp_Oriya }, + { 1163, PT_SC, ucp_Osage }, + { 1169, PT_SC, ucp_Osmanya }, + { 1177, PT_GC, ucp_P }, + { 1179, PT_SC, ucp_Pahawh_Hmong }, + { 1192, PT_SC, ucp_Palmyrene }, + { 1202, PT_SC, ucp_Pau_Cin_Hau }, + { 1214, PT_PC, ucp_Pc }, + { 1217, PT_PC, ucp_Pd }, + { 1220, PT_PC, ucp_Pe }, + { 1223, PT_PC, ucp_Pf }, + { 1226, PT_SC, ucp_Phags_Pa }, + { 1235, PT_SC, ucp_Phoenician }, + { 1246, PT_PC, ucp_Pi }, + { 1249, PT_PC, ucp_Po }, + { 1252, PT_PC, ucp_Ps }, + { 1255, PT_SC, ucp_Psalter_Pahlavi }, + { 1271, PT_SC, ucp_Rejang }, + { 1278, PT_SC, ucp_Runic }, + { 1284, PT_GC, ucp_S }, + { 1286, PT_SC, ucp_Samaritan }, + { 1296, PT_SC, ucp_Saurashtra }, + { 1307, PT_PC, ucp_Sc }, + { 1310, PT_SC, ucp_Sharada }, + { 1318, PT_SC, ucp_Shavian }, + { 1326, PT_SC, ucp_Siddham }, + { 1334, PT_SC, ucp_SignWriting }, + { 1346, PT_SC, ucp_Sinhala }, + { 1354, PT_PC, ucp_Sk }, + { 1357, PT_PC, ucp_Sm }, + { 1360, PT_PC, ucp_So }, + { 1363, PT_SC, ucp_Sogdian }, + { 1371, PT_SC, ucp_Sora_Sompeng }, + { 1384, PT_SC, ucp_Soyombo }, + { 1392, PT_SC, ucp_Sundanese }, + { 1402, PT_SC, ucp_Syloti_Nagri }, + { 1415, PT_SC, ucp_Syriac }, + { 1422, PT_SC, ucp_Tagalog }, + { 1430, PT_SC, ucp_Tagbanwa }, + { 1439, PT_SC, ucp_Tai_Le }, + { 1446, PT_SC, ucp_Tai_Tham }, + { 1455, PT_SC, ucp_Tai_Viet }, + { 1464, PT_SC, ucp_Takri }, + { 1470, PT_SC, ucp_Tamil }, + { 1476, PT_SC, ucp_Tangut }, + { 1483, PT_SC, ucp_Telugu }, + { 1490, PT_SC, ucp_Thaana }, + { 1497, PT_SC, ucp_Thai }, + { 1502, PT_SC, ucp_Tibetan }, + { 1510, PT_SC, ucp_Tifinagh }, + { 1519, PT_SC, ucp_Tirhuta }, + { 1527, PT_SC, ucp_Ugaritic }, + { 1536, PT_SC, ucp_Unknown }, + { 1544, PT_SC, ucp_Vai }, + { 1548, PT_SC, ucp_Wancho }, + { 1555, PT_SC, ucp_Warang_Citi }, + { 1567, PT_ALNUM, 0 }, + { 1571, PT_PXSPACE, 0 }, + { 1575, PT_SPACE, 0 }, + { 1579, PT_UCNC, 0 }, + { 1583, PT_WORD, 0 }, + { 1587, PT_SC, ucp_Yezidi }, + { 1594, PT_SC, ucp_Yi }, + { 1597, PT_GC, ucp_Z }, + { 1599, PT_SC, ucp_Zanabazar_Square }, + { 1616, PT_PC, ucp_Zl }, + { 1619, PT_PC, ucp_Zp }, + { 1622, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |