summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_ucp.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_ucp.h')
-rw-r--r--thirdparty/pcre2/src/pcre2_ucp.h353
1 files changed, 221 insertions, 132 deletions
diff --git a/thirdparty/pcre2/src/pcre2_ucp.h b/thirdparty/pcre2/src/pcre2_ucp.h
index d84f269e87..282238982d 100644
--- a/thirdparty/pcre2/src/pcre2_ucp.h
+++ b/thirdparty/pcre2/src/pcre2_ucp.h
@@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
+a new version of this code.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
-
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
-/* This file contains definitions of the property values that are returned by
-the UCD access macros. New values that are added for new releases of Unicode
-should always be at the end of each enum, for backwards compatibility.
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
-IMPORTANT: Note also that the specific numeric values of the enums have to be
-the same as the values that are generated by the maint/MultiStage2.py script,
-where the equivalent property descriptive names are listed in vectors.
-
-ALSO: The specific values of the first two enums are assumed for the table
-called catposstab in pcre2_compile.c. */
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
/* These are the general character categories. */
enum {
- ucp_C, /* Other */
- ucp_L, /* Letter */
- ucp_M, /* Mark */
- ucp_N, /* Number */
- ucp_P, /* Punctuation */
- ucp_S, /* Symbol */
- ucp_Z /* Separator */
+ ucp_C,
+ ucp_L,
+ ucp_M,
+ ucp_N,
+ ucp_P,
+ ucp_S,
+ ucp_Z,
};
/* These are the particular character categories. */
@@ -97,7 +97,98 @@ enum {
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
- ucp_Zs /* Space separator */
+ ucp_Zs, /* Space separator */
+};
+
+/* These are Boolean properties. */
+
+enum {
+ ucp_ASCII,
+ ucp_ASCII_Hex_Digit,
+ ucp_Alphabetic,
+ ucp_Bidi_Control,
+ ucp_Bidi_Mirrored,
+ ucp_Case_Ignorable,
+ ucp_Cased,
+ ucp_Changes_When_Casefolded,
+ ucp_Changes_When_Casemapped,
+ ucp_Changes_When_Lowercased,
+ ucp_Changes_When_Titlecased,
+ ucp_Changes_When_Uppercased,
+ ucp_Dash,
+ ucp_Default_Ignorable_Code_Point,
+ ucp_Deprecated,
+ ucp_Diacritic,
+ ucp_Emoji,
+ ucp_Emoji_Component,
+ ucp_Emoji_Modifier,
+ ucp_Emoji_Modifier_Base,
+ ucp_Emoji_Presentation,
+ ucp_Extended_Pictographic,
+ ucp_Extender,
+ ucp_Grapheme_Base,
+ ucp_Grapheme_Extend,
+ ucp_Grapheme_Link,
+ ucp_Hex_Digit,
+ ucp_IDS_Binary_Operator,
+ ucp_IDS_Trinary_Operator,
+ ucp_ID_Continue,
+ ucp_ID_Start,
+ ucp_Ideographic,
+ ucp_Join_Control,
+ ucp_Logical_Order_Exception,
+ ucp_Lowercase,
+ ucp_Math,
+ ucp_Noncharacter_Code_Point,
+ ucp_Pattern_Syntax,
+ ucp_Pattern_White_Space,
+ ucp_Prepended_Concatenation_Mark,
+ ucp_Quotation_Mark,
+ ucp_Radical,
+ ucp_Regional_Indicator,
+ ucp_Sentence_Terminal,
+ ucp_Soft_Dotted,
+ ucp_Terminal_Punctuation,
+ ucp_Unified_Ideograph,
+ ucp_Uppercase,
+ ucp_Variation_Selector,
+ ucp_White_Space,
+ ucp_XID_Continue,
+ ucp_XID_Start,
+ /* This must be last */
+ ucp_Bprop_Count
+};
+
+/* Size of entries in ucd_boolprop_sets[] */
+
+#define ucd_boolprop_sets_item_size 2
+
+/* These are the bidi class values. */
+
+enum {
+ ucp_bidiAL, /* Arabic letter */
+ ucp_bidiAN, /* Arabic number */
+ ucp_bidiB, /* Paragraph separator */
+ ucp_bidiBN, /* Boundary neutral */
+ ucp_bidiCS, /* Common separator */
+ ucp_bidiEN, /* European number */
+ ucp_bidiES, /* European separator */
+ ucp_bidiET, /* European terminator */
+ ucp_bidiFSI, /* First strong isolate */
+ ucp_bidiL, /* Left to right */
+ ucp_bidiLRE, /* Left to right embedding */
+ ucp_bidiLRI, /* Left to right isolate */
+ ucp_bidiLRO, /* Left to right override */
+ ucp_bidiNSM, /* Non-spacing mark */
+ ucp_bidiON, /* Other neutral */
+ ucp_bidiPDF, /* Pop directional format */
+ ucp_bidiPDI, /* Pop directional isolate */
+ ucp_bidiR, /* Right to left */
+ ucp_bidiRLE, /* Right to left embedding */
+ ucp_bidiRLI, /* Right to left isolate */
+ ucp_bidiRLO, /* Right to left override */
+ ucp_bidiS, /* Segment separator */
+ ucp_bidiWS, /* White space */
};
/* These are grapheme break properties. The Extended Pictographic property
@@ -115,191 +206,189 @@ enum {
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
- ucp_gbRegionalIndicator, /* 11 */
+ ucp_gbRegional_Indicator, /* 11 */
ucp_gbOther, /* 12 */
ucp_gbZWJ, /* 13 */
- ucp_gbExtended_Pictographic /* 14 */
+ ucp_gbExtended_Pictographic, /* 14 */
};
/* These are the script identifications. */
enum {
- ucp_Unknown,
- ucp_Arabic,
- ucp_Armenian,
- ucp_Bengali,
- ucp_Bopomofo,
- ucp_Braille,
- ucp_Buginese,
- ucp_Buhid,
- ucp_Canadian_Aboriginal,
- ucp_Cherokee,
- ucp_Common,
- ucp_Coptic,
- ucp_Cypriot,
+ /* Scripts which has characters in other scripts. */
+ ucp_Latin,
+ ucp_Greek,
ucp_Cyrillic,
- ucp_Deseret,
+ ucp_Arabic,
+ ucp_Syriac,
+ ucp_Thaana,
ucp_Devanagari,
- ucp_Ethiopic,
- ucp_Georgian,
- ucp_Glagolitic,
- ucp_Gothic,
- ucp_Greek,
- ucp_Gujarati,
+ ucp_Bengali,
ucp_Gurmukhi,
- ucp_Han,
- ucp_Hangul,
- ucp_Hanunoo,
- ucp_Hebrew,
- ucp_Hiragana,
- ucp_Inherited,
+ ucp_Gujarati,
+ ucp_Oriya,
+ ucp_Tamil,
+ ucp_Telugu,
ucp_Kannada,
- ucp_Katakana,
- ucp_Kharoshthi,
- ucp_Khmer,
- ucp_Lao,
- ucp_Latin,
- ucp_Limbu,
- ucp_Linear_B,
ucp_Malayalam,
- ucp_Mongolian,
- ucp_Myanmar,
- ucp_New_Tai_Lue,
- ucp_Ogham,
- ucp_Old_Italic,
- ucp_Old_Persian,
- ucp_Oriya,
- ucp_Osmanya,
- ucp_Runic,
- ucp_Shavian,
ucp_Sinhala,
- ucp_Syloti_Nagri,
- ucp_Syriac,
+ ucp_Myanmar,
+ ucp_Georgian,
+ ucp_Hangul,
+ ucp_Mongolian,
+ ucp_Hiragana,
+ ucp_Katakana,
+ ucp_Bopomofo,
+ ucp_Han,
+ ucp_Yi,
ucp_Tagalog,
+ ucp_Hanunoo,
+ ucp_Buhid,
ucp_Tagbanwa,
+ ucp_Limbu,
ucp_Tai_Le,
- ucp_Tamil,
- ucp_Telugu,
- ucp_Thaana,
+ ucp_Linear_B,
+ ucp_Cypriot,
+ ucp_Buginese,
+ ucp_Coptic,
+ ucp_Glagolitic,
+ ucp_Syloti_Nagri,
+ ucp_Phags_Pa,
+ ucp_Nko,
+ ucp_Kayah_Li,
+ ucp_Javanese,
+ ucp_Kaithi,
+ ucp_Mandaic,
+ ucp_Chakma,
+ ucp_Sharada,
+ ucp_Takri,
+ ucp_Duployan,
+ ucp_Grantha,
+ ucp_Khojki,
+ ucp_Linear_A,
+ ucp_Mahajani,
+ ucp_Manichaean,
+ ucp_Modi,
+ ucp_Old_Permic,
+ ucp_Psalter_Pahlavi,
+ ucp_Khudawadi,
+ ucp_Tirhuta,
+ ucp_Multani,
+ ucp_Adlam,
+ ucp_Masaram_Gondi,
+ ucp_Dogra,
+ ucp_Gunjala_Gondi,
+ ucp_Hanifi_Rohingya,
+ ucp_Sogdian,
+ ucp_Nandinagari,
+ ucp_Yezidi,
+ ucp_Cypro_Minoan,
+ ucp_Old_Uyghur,
+
+ /* Scripts which has no characters in other scripts. */
+ ucp_Unknown,
+ ucp_Common,
+ ucp_Armenian,
+ ucp_Hebrew,
ucp_Thai,
+ ucp_Lao,
ucp_Tibetan,
- ucp_Tifinagh,
+ ucp_Ethiopic,
+ ucp_Cherokee,
+ ucp_Canadian_Aboriginal,
+ ucp_Ogham,
+ ucp_Runic,
+ ucp_Khmer,
+ ucp_Old_Italic,
+ ucp_Gothic,
+ ucp_Deseret,
+ ucp_Inherited,
ucp_Ugaritic,
- ucp_Yi,
- /* New for Unicode 5.0 */
+ ucp_Shavian,
+ ucp_Osmanya,
+ ucp_Braille,
+ ucp_New_Tai_Lue,
+ ucp_Tifinagh,
+ ucp_Old_Persian,
+ ucp_Kharoshthi,
ucp_Balinese,
ucp_Cuneiform,
- ucp_Nko,
- ucp_Phags_Pa,
ucp_Phoenician,
- /* New for Unicode 5.1 */
- ucp_Carian,
- ucp_Cham,
- ucp_Kayah_Li,
+ ucp_Sundanese,
ucp_Lepcha,
- ucp_Lycian,
- ucp_Lydian,
ucp_Ol_Chiki,
- ucp_Rejang,
- ucp_Saurashtra,
- ucp_Sundanese,
ucp_Vai,
- /* New for Unicode 5.2 */
+ ucp_Saurashtra,
+ ucp_Rejang,
+ ucp_Lycian,
+ ucp_Carian,
+ ucp_Lydian,
+ ucp_Cham,
+ ucp_Tai_Tham,
+ ucp_Tai_Viet,
ucp_Avestan,
- ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
- ucp_Imperial_Aramaic,
- ucp_Inscriptional_Pahlavi,
- ucp_Inscriptional_Parthian,
- ucp_Javanese,
- ucp_Kaithi,
+ ucp_Samaritan,
ucp_Lisu,
+ ucp_Bamum,
ucp_Meetei_Mayek,
+ ucp_Imperial_Aramaic,
ucp_Old_South_Arabian,
+ ucp_Inscriptional_Parthian,
+ ucp_Inscriptional_Pahlavi,
ucp_Old_Turkic,
- ucp_Samaritan,
- ucp_Tai_Tham,
- ucp_Tai_Viet,
- /* New for Unicode 6.0.0 */
ucp_Batak,
ucp_Brahmi,
- ucp_Mandaic,
- /* New for Unicode 6.1.0 */
- ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
- ucp_Sharada,
ucp_Sora_Sompeng,
- ucp_Takri,
- /* New for Unicode 7.0.0 */
- ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
- ucp_Duployan,
+ ucp_Bassa_Vah,
ucp_Elbasan,
- ucp_Grantha,
- ucp_Khojki,
- ucp_Khudawadi,
- ucp_Linear_A,
- ucp_Mahajani,
- ucp_Manichaean,
+ ucp_Pahawh_Hmong,
ucp_Mende_Kikakui,
- ucp_Modi,
ucp_Mro,
- ucp_Nabataean,
ucp_Old_North_Arabian,
- ucp_Old_Permic,
- ucp_Pahawh_Hmong,
+ ucp_Nabataean,
ucp_Palmyrene,
- ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau,
ucp_Siddham,
- ucp_Tirhuta,
ucp_Warang_Citi,
- /* New for Unicode 8.0.0 */
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
- ucp_Multani,
ucp_Old_Hungarian,
ucp_SignWriting,
- /* New for Unicode 10.0.0 (no update since 8.0.0) */
- ucp_Adlam,
ucp_Bhaiksuki,
ucp_Marchen,
ucp_Newa,
ucp_Osage,
ucp_Tangut,
- ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square,
- /* New for Unicode 11.0.0 */
- ucp_Dogra,
- ucp_Gunjala_Gondi,
- ucp_Hanifi_Rohingya,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
- ucp_Sogdian,
- /* New for Unicode 12.0.0 */
ucp_Elymaic,
- ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho,
- /* New for Unicode 13.0.0 */
ucp_Chorasmian,
ucp_Dives_Akuru,
ucp_Khitan_Small_Script,
- ucp_Yezidi,
- /* New for Unicode 14.0.0 */
- ucp_Cypro_Minoan,
- ucp_Old_Uyghur,
ucp_Tangsa,
ucp_Toto,
- ucp_Vithkuqi
+ ucp_Vithkuqi,
+
+ /* This must be last */
+ ucp_Script_Count
};
+/* Size of entries in ucd_script_sets[] */
+
+#define ucd_script_sets_item_size 3
+
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */