diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_ucp.h')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_ucp.h | 353 |
1 files changed, 221 insertions, 132 deletions
diff --git a/thirdparty/pcre2/src/pcre2_ucp.h b/thirdparty/pcre2/src/pcre2_ucp.h index d84f269e87..282238982d 100644 --- a/thirdparty/pcre2/src/pcre2_ucp.h +++ b/thirdparty/pcre2/src/pcre2_ucp.h @@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +Instead, modify the maint/GenerateUcpHeader.py script and run it to generate +a new version of this code. ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ - #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD #define PCRE2_UCP_H_IDEMPOTENT_GUARD -/* This file contains definitions of the property values that are returned by -the UCD access macros. New values that are added for new releases of Unicode -should always be at the end of each enum, for backwards compatibility. +/* This file contains definitions of the Unicode property values that are +returned by the UCD access macros and used throughout PCRE2. -IMPORTANT: Note also that the specific numeric values of the enums have to be -the same as the values that are generated by the maint/MultiStage2.py script, -where the equivalent property descriptive names are listed in vectors. - -ALSO: The specific values of the first two enums are assumed for the table -called catposstab in pcre2_compile.c. */ +IMPORTANT: The specific values of the first two enums (general and particular +character categories) are assumed by the table called catposstab in the file +pcre2_auto_possess.c. They are unlikely to change, but should be checked after +an update. */ /* These are the general character categories. */ enum { - ucp_C, /* Other */ - ucp_L, /* Letter */ - ucp_M, /* Mark */ - ucp_N, /* Number */ - ucp_P, /* Punctuation */ - ucp_S, /* Symbol */ - ucp_Z /* Separator */ + ucp_C, + ucp_L, + ucp_M, + ucp_N, + ucp_P, + ucp_S, + ucp_Z, }; /* These are the particular character categories. */ @@ -97,7 +97,98 @@ enum { ucp_So, /* Other symbol */ ucp_Zl, /* Line separator */ ucp_Zp, /* Paragraph separator */ - ucp_Zs /* Space separator */ + ucp_Zs, /* Space separator */ +}; + +/* These are Boolean properties. */ + +enum { + ucp_ASCII, + ucp_ASCII_Hex_Digit, + ucp_Alphabetic, + ucp_Bidi_Control, + ucp_Bidi_Mirrored, + ucp_Case_Ignorable, + ucp_Cased, + ucp_Changes_When_Casefolded, + ucp_Changes_When_Casemapped, + ucp_Changes_When_Lowercased, + ucp_Changes_When_Titlecased, + ucp_Changes_When_Uppercased, + ucp_Dash, + ucp_Default_Ignorable_Code_Point, + ucp_Deprecated, + ucp_Diacritic, + ucp_Emoji, + ucp_Emoji_Component, + ucp_Emoji_Modifier, + ucp_Emoji_Modifier_Base, + ucp_Emoji_Presentation, + ucp_Extended_Pictographic, + ucp_Extender, + ucp_Grapheme_Base, + ucp_Grapheme_Extend, + ucp_Grapheme_Link, + ucp_Hex_Digit, + ucp_IDS_Binary_Operator, + ucp_IDS_Trinary_Operator, + ucp_ID_Continue, + ucp_ID_Start, + ucp_Ideographic, + ucp_Join_Control, + ucp_Logical_Order_Exception, + ucp_Lowercase, + ucp_Math, + ucp_Noncharacter_Code_Point, + ucp_Pattern_Syntax, + ucp_Pattern_White_Space, + ucp_Prepended_Concatenation_Mark, + ucp_Quotation_Mark, + ucp_Radical, + ucp_Regional_Indicator, + ucp_Sentence_Terminal, + ucp_Soft_Dotted, + ucp_Terminal_Punctuation, + ucp_Unified_Ideograph, + ucp_Uppercase, + ucp_Variation_Selector, + ucp_White_Space, + ucp_XID_Continue, + ucp_XID_Start, + /* This must be last */ + ucp_Bprop_Count +}; + +/* Size of entries in ucd_boolprop_sets[] */ + +#define ucd_boolprop_sets_item_size 2 + +/* These are the bidi class values. */ + +enum { + ucp_bidiAL, /* Arabic letter */ + ucp_bidiAN, /* Arabic number */ + ucp_bidiB, /* Paragraph separator */ + ucp_bidiBN, /* Boundary neutral */ + ucp_bidiCS, /* Common separator */ + ucp_bidiEN, /* European number */ + ucp_bidiES, /* European separator */ + ucp_bidiET, /* European terminator */ + ucp_bidiFSI, /* First strong isolate */ + ucp_bidiL, /* Left to right */ + ucp_bidiLRE, /* Left to right embedding */ + ucp_bidiLRI, /* Left to right isolate */ + ucp_bidiLRO, /* Left to right override */ + ucp_bidiNSM, /* Non-spacing mark */ + ucp_bidiON, /* Other neutral */ + ucp_bidiPDF, /* Pop directional format */ + ucp_bidiPDI, /* Pop directional isolate */ + ucp_bidiR, /* Right to left */ + ucp_bidiRLE, /* Right to left embedding */ + ucp_bidiRLI, /* Right to left isolate */ + ucp_bidiRLO, /* Right to left override */ + ucp_bidiS, /* Segment separator */ + ucp_bidiWS, /* White space */ }; /* These are grapheme break properties. The Extended Pictographic property @@ -115,191 +206,189 @@ enum { ucp_gbT, /* 8 Hangul syllable type T */ ucp_gbLV, /* 9 Hangul syllable type LV */ ucp_gbLVT, /* 10 Hangul syllable type LVT */ - ucp_gbRegionalIndicator, /* 11 */ + ucp_gbRegional_Indicator, /* 11 */ ucp_gbOther, /* 12 */ ucp_gbZWJ, /* 13 */ - ucp_gbExtended_Pictographic /* 14 */ + ucp_gbExtended_Pictographic, /* 14 */ }; /* These are the script identifications. */ enum { - ucp_Unknown, - ucp_Arabic, - ucp_Armenian, - ucp_Bengali, - ucp_Bopomofo, - ucp_Braille, - ucp_Buginese, - ucp_Buhid, - ucp_Canadian_Aboriginal, - ucp_Cherokee, - ucp_Common, - ucp_Coptic, - ucp_Cypriot, + /* Scripts which has characters in other scripts. */ + ucp_Latin, + ucp_Greek, ucp_Cyrillic, - ucp_Deseret, + ucp_Arabic, + ucp_Syriac, + ucp_Thaana, ucp_Devanagari, - ucp_Ethiopic, - ucp_Georgian, - ucp_Glagolitic, - ucp_Gothic, - ucp_Greek, - ucp_Gujarati, + ucp_Bengali, ucp_Gurmukhi, - ucp_Han, - ucp_Hangul, - ucp_Hanunoo, - ucp_Hebrew, - ucp_Hiragana, - ucp_Inherited, + ucp_Gujarati, + ucp_Oriya, + ucp_Tamil, + ucp_Telugu, ucp_Kannada, - ucp_Katakana, - ucp_Kharoshthi, - ucp_Khmer, - ucp_Lao, - ucp_Latin, - ucp_Limbu, - ucp_Linear_B, ucp_Malayalam, - ucp_Mongolian, - ucp_Myanmar, - ucp_New_Tai_Lue, - ucp_Ogham, - ucp_Old_Italic, - ucp_Old_Persian, - ucp_Oriya, - ucp_Osmanya, - ucp_Runic, - ucp_Shavian, ucp_Sinhala, - ucp_Syloti_Nagri, - ucp_Syriac, + ucp_Myanmar, + ucp_Georgian, + ucp_Hangul, + ucp_Mongolian, + ucp_Hiragana, + ucp_Katakana, + ucp_Bopomofo, + ucp_Han, + ucp_Yi, ucp_Tagalog, + ucp_Hanunoo, + ucp_Buhid, ucp_Tagbanwa, + ucp_Limbu, ucp_Tai_Le, - ucp_Tamil, - ucp_Telugu, - ucp_Thaana, + ucp_Linear_B, + ucp_Cypriot, + ucp_Buginese, + ucp_Coptic, + ucp_Glagolitic, + ucp_Syloti_Nagri, + ucp_Phags_Pa, + ucp_Nko, + ucp_Kayah_Li, + ucp_Javanese, + ucp_Kaithi, + ucp_Mandaic, + ucp_Chakma, + ucp_Sharada, + ucp_Takri, + ucp_Duployan, + ucp_Grantha, + ucp_Khojki, + ucp_Linear_A, + ucp_Mahajani, + ucp_Manichaean, + ucp_Modi, + ucp_Old_Permic, + ucp_Psalter_Pahlavi, + ucp_Khudawadi, + ucp_Tirhuta, + ucp_Multani, + ucp_Adlam, + ucp_Masaram_Gondi, + ucp_Dogra, + ucp_Gunjala_Gondi, + ucp_Hanifi_Rohingya, + ucp_Sogdian, + ucp_Nandinagari, + ucp_Yezidi, + ucp_Cypro_Minoan, + ucp_Old_Uyghur, + + /* Scripts which has no characters in other scripts. */ + ucp_Unknown, + ucp_Common, + ucp_Armenian, + ucp_Hebrew, ucp_Thai, + ucp_Lao, ucp_Tibetan, - ucp_Tifinagh, + ucp_Ethiopic, + ucp_Cherokee, + ucp_Canadian_Aboriginal, + ucp_Ogham, + ucp_Runic, + ucp_Khmer, + ucp_Old_Italic, + ucp_Gothic, + ucp_Deseret, + ucp_Inherited, ucp_Ugaritic, - ucp_Yi, - /* New for Unicode 5.0 */ + ucp_Shavian, + ucp_Osmanya, + ucp_Braille, + ucp_New_Tai_Lue, + ucp_Tifinagh, + ucp_Old_Persian, + ucp_Kharoshthi, ucp_Balinese, ucp_Cuneiform, - ucp_Nko, - ucp_Phags_Pa, ucp_Phoenician, - /* New for Unicode 5.1 */ - ucp_Carian, - ucp_Cham, - ucp_Kayah_Li, + ucp_Sundanese, ucp_Lepcha, - ucp_Lycian, - ucp_Lydian, ucp_Ol_Chiki, - ucp_Rejang, - ucp_Saurashtra, - ucp_Sundanese, ucp_Vai, - /* New for Unicode 5.2 */ + ucp_Saurashtra, + ucp_Rejang, + ucp_Lycian, + ucp_Carian, + ucp_Lydian, + ucp_Cham, + ucp_Tai_Tham, + ucp_Tai_Viet, ucp_Avestan, - ucp_Bamum, ucp_Egyptian_Hieroglyphs, - ucp_Imperial_Aramaic, - ucp_Inscriptional_Pahlavi, - ucp_Inscriptional_Parthian, - ucp_Javanese, - ucp_Kaithi, + ucp_Samaritan, ucp_Lisu, + ucp_Bamum, ucp_Meetei_Mayek, + ucp_Imperial_Aramaic, ucp_Old_South_Arabian, + ucp_Inscriptional_Parthian, + ucp_Inscriptional_Pahlavi, ucp_Old_Turkic, - ucp_Samaritan, - ucp_Tai_Tham, - ucp_Tai_Viet, - /* New for Unicode 6.0.0 */ ucp_Batak, ucp_Brahmi, - ucp_Mandaic, - /* New for Unicode 6.1.0 */ - ucp_Chakma, ucp_Meroitic_Cursive, ucp_Meroitic_Hieroglyphs, ucp_Miao, - ucp_Sharada, ucp_Sora_Sompeng, - ucp_Takri, - /* New for Unicode 7.0.0 */ - ucp_Bassa_Vah, ucp_Caucasian_Albanian, - ucp_Duployan, + ucp_Bassa_Vah, ucp_Elbasan, - ucp_Grantha, - ucp_Khojki, - ucp_Khudawadi, - ucp_Linear_A, - ucp_Mahajani, - ucp_Manichaean, + ucp_Pahawh_Hmong, ucp_Mende_Kikakui, - ucp_Modi, ucp_Mro, - ucp_Nabataean, ucp_Old_North_Arabian, - ucp_Old_Permic, - ucp_Pahawh_Hmong, + ucp_Nabataean, ucp_Palmyrene, - ucp_Psalter_Pahlavi, ucp_Pau_Cin_Hau, ucp_Siddham, - ucp_Tirhuta, ucp_Warang_Citi, - /* New for Unicode 8.0.0 */ ucp_Ahom, ucp_Anatolian_Hieroglyphs, ucp_Hatran, - ucp_Multani, ucp_Old_Hungarian, ucp_SignWriting, - /* New for Unicode 10.0.0 (no update since 8.0.0) */ - ucp_Adlam, ucp_Bhaiksuki, ucp_Marchen, ucp_Newa, ucp_Osage, ucp_Tangut, - ucp_Masaram_Gondi, ucp_Nushu, ucp_Soyombo, ucp_Zanabazar_Square, - /* New for Unicode 11.0.0 */ - ucp_Dogra, - ucp_Gunjala_Gondi, - ucp_Hanifi_Rohingya, ucp_Makasar, ucp_Medefaidrin, ucp_Old_Sogdian, - ucp_Sogdian, - /* New for Unicode 12.0.0 */ ucp_Elymaic, - ucp_Nandinagari, ucp_Nyiakeng_Puachue_Hmong, ucp_Wancho, - /* New for Unicode 13.0.0 */ ucp_Chorasmian, ucp_Dives_Akuru, ucp_Khitan_Small_Script, - ucp_Yezidi, - /* New for Unicode 14.0.0 */ - ucp_Cypro_Minoan, - ucp_Old_Uyghur, ucp_Tangsa, ucp_Toto, - ucp_Vithkuqi + ucp_Vithkuqi, + + /* This must be last */ + ucp_Script_Count }; +/* Size of entries in ucd_script_sets[] */ + +#define ucd_script_sets_item_size 3 + #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ /* End of pcre2_ucp.h */ |