diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_ucp.h')
| -rw-r--r-- | thirdparty/pcre2/src/pcre2_ucp.h | 353 | 
1 files changed, 221 insertions, 132 deletions
| diff --git a/thirdparty/pcre2/src/pcre2_ucp.h b/thirdparty/pcre2/src/pcre2_ucp.h index d84f269e87..282238982d 100644 --- a/thirdparty/pcre2/src/pcre2_ucp.h +++ b/thirdparty/pcre2/src/pcre2_ucp.h @@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language.                         Written by Philip Hazel       Original API code Copyright (c) 1997-2012 University of Cambridge -          New API code Copyright (c) 2016-2018 University of Cambridge +          New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +Instead, modify the maint/GenerateUcpHeader.py script and run it to generate +a new version of this code.  -----------------------------------------------------------------------------  Redistribution and use in source and binary forms, with or without @@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE.  -----------------------------------------------------------------------------  */ -  #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD  #define PCRE2_UCP_H_IDEMPOTENT_GUARD -/* This file contains definitions of the property values that are returned by -the UCD access macros. New values that are added for new releases of Unicode -should always be at the end of each enum, for backwards compatibility. +/* This file contains definitions of the Unicode property values that are +returned by the UCD access macros and used throughout PCRE2. -IMPORTANT: Note also that the specific numeric values of the enums have to be -the same as the values that are generated by the maint/MultiStage2.py script, -where the equivalent property descriptive names are listed in vectors. - -ALSO: The specific values of the first two enums are assumed for the table -called catposstab in pcre2_compile.c. */ +IMPORTANT: The specific values of the first two enums (general and particular +character categories) are assumed by the table called catposstab in the file +pcre2_auto_possess.c. They are unlikely to change, but should be checked after +an update. */  /* These are the general character categories. */  enum { -  ucp_C,     /* Other */ -  ucp_L,     /* Letter */ -  ucp_M,     /* Mark */ -  ucp_N,     /* Number */ -  ucp_P,     /* Punctuation */ -  ucp_S,     /* Symbol */ -  ucp_Z      /* Separator */ +  ucp_C, +  ucp_L, +  ucp_M, +  ucp_N, +  ucp_P, +  ucp_S, +  ucp_Z,  };  /* These are the particular character categories. */ @@ -97,7 +97,98 @@ enum {    ucp_So,    /* Other symbol */    ucp_Zl,    /* Line separator */    ucp_Zp,    /* Paragraph separator */ -  ucp_Zs     /* Space separator */ +  ucp_Zs,    /* Space separator */ +}; + +/* These are Boolean properties. */ + +enum { +  ucp_ASCII, +  ucp_ASCII_Hex_Digit, +  ucp_Alphabetic, +  ucp_Bidi_Control, +  ucp_Bidi_Mirrored, +  ucp_Case_Ignorable, +  ucp_Cased, +  ucp_Changes_When_Casefolded, +  ucp_Changes_When_Casemapped, +  ucp_Changes_When_Lowercased, +  ucp_Changes_When_Titlecased, +  ucp_Changes_When_Uppercased, +  ucp_Dash, +  ucp_Default_Ignorable_Code_Point, +  ucp_Deprecated, +  ucp_Diacritic, +  ucp_Emoji, +  ucp_Emoji_Component, +  ucp_Emoji_Modifier, +  ucp_Emoji_Modifier_Base, +  ucp_Emoji_Presentation, +  ucp_Extended_Pictographic, +  ucp_Extender, +  ucp_Grapheme_Base, +  ucp_Grapheme_Extend, +  ucp_Grapheme_Link, +  ucp_Hex_Digit, +  ucp_IDS_Binary_Operator, +  ucp_IDS_Trinary_Operator, +  ucp_ID_Continue, +  ucp_ID_Start, +  ucp_Ideographic, +  ucp_Join_Control, +  ucp_Logical_Order_Exception, +  ucp_Lowercase, +  ucp_Math, +  ucp_Noncharacter_Code_Point, +  ucp_Pattern_Syntax, +  ucp_Pattern_White_Space, +  ucp_Prepended_Concatenation_Mark, +  ucp_Quotation_Mark, +  ucp_Radical, +  ucp_Regional_Indicator, +  ucp_Sentence_Terminal, +  ucp_Soft_Dotted, +  ucp_Terminal_Punctuation, +  ucp_Unified_Ideograph, +  ucp_Uppercase, +  ucp_Variation_Selector, +  ucp_White_Space, +  ucp_XID_Continue, +  ucp_XID_Start, +  /* This must be last */ +  ucp_Bprop_Count +}; + +/* Size of entries in ucd_boolprop_sets[] */ + +#define ucd_boolprop_sets_item_size 2 + +/* These are the bidi class values. */ + +enum { +  ucp_bidiAL,   /* Arabic letter */ +  ucp_bidiAN,   /* Arabic number */ +  ucp_bidiB,    /* Paragraph separator */ +  ucp_bidiBN,   /* Boundary neutral */ +  ucp_bidiCS,   /* Common separator */ +  ucp_bidiEN,   /* European number */ +  ucp_bidiES,   /* European separator */ +  ucp_bidiET,   /* European terminator */ +  ucp_bidiFSI,  /* First strong isolate */ +  ucp_bidiL,    /* Left to right */ +  ucp_bidiLRE,  /* Left to right embedding */ +  ucp_bidiLRI,  /* Left to right isolate */ +  ucp_bidiLRO,  /* Left to right override */ +  ucp_bidiNSM,  /* Non-spacing mark */ +  ucp_bidiON,   /* Other neutral */ +  ucp_bidiPDF,  /* Pop directional format */ +  ucp_bidiPDI,  /* Pop directional isolate */ +  ucp_bidiR,    /* Right to left */ +  ucp_bidiRLE,  /* Right to left embedding */ +  ucp_bidiRLI,  /* Right to left isolate */ +  ucp_bidiRLO,  /* Right to left override */ +  ucp_bidiS,    /* Segment separator */ +  ucp_bidiWS,   /* White space */  };  /* These are grapheme break properties. The Extended Pictographic property @@ -115,191 +206,189 @@ enum {    ucp_gbT,                     /*  8 Hangul syllable type T */    ucp_gbLV,                    /*  9 Hangul syllable type LV */    ucp_gbLVT,                   /* 10 Hangul syllable type LVT */ -  ucp_gbRegionalIndicator,     /* 11 */ +  ucp_gbRegional_Indicator,    /* 11 */    ucp_gbOther,                 /* 12 */    ucp_gbZWJ,                   /* 13 */ -  ucp_gbExtended_Pictographic  /* 14 */ +  ucp_gbExtended_Pictographic, /* 14 */  };  /* These are the script identifications. */  enum { -  ucp_Unknown, -  ucp_Arabic, -  ucp_Armenian, -  ucp_Bengali, -  ucp_Bopomofo, -  ucp_Braille, -  ucp_Buginese, -  ucp_Buhid, -  ucp_Canadian_Aboriginal, -  ucp_Cherokee, -  ucp_Common, -  ucp_Coptic, -  ucp_Cypriot, +  /* Scripts which has characters in other scripts. */ +  ucp_Latin, +  ucp_Greek,    ucp_Cyrillic, -  ucp_Deseret, +  ucp_Arabic, +  ucp_Syriac, +  ucp_Thaana,    ucp_Devanagari, -  ucp_Ethiopic, -  ucp_Georgian, -  ucp_Glagolitic, -  ucp_Gothic, -  ucp_Greek, -  ucp_Gujarati, +  ucp_Bengali,    ucp_Gurmukhi, -  ucp_Han, -  ucp_Hangul, -  ucp_Hanunoo, -  ucp_Hebrew, -  ucp_Hiragana, -  ucp_Inherited, +  ucp_Gujarati, +  ucp_Oriya, +  ucp_Tamil, +  ucp_Telugu,    ucp_Kannada, -  ucp_Katakana, -  ucp_Kharoshthi, -  ucp_Khmer, -  ucp_Lao, -  ucp_Latin, -  ucp_Limbu, -  ucp_Linear_B,    ucp_Malayalam, -  ucp_Mongolian, -  ucp_Myanmar, -  ucp_New_Tai_Lue, -  ucp_Ogham, -  ucp_Old_Italic, -  ucp_Old_Persian, -  ucp_Oriya, -  ucp_Osmanya, -  ucp_Runic, -  ucp_Shavian,    ucp_Sinhala, -  ucp_Syloti_Nagri, -  ucp_Syriac, +  ucp_Myanmar, +  ucp_Georgian, +  ucp_Hangul, +  ucp_Mongolian, +  ucp_Hiragana, +  ucp_Katakana, +  ucp_Bopomofo, +  ucp_Han, +  ucp_Yi,    ucp_Tagalog, +  ucp_Hanunoo, +  ucp_Buhid,    ucp_Tagbanwa, +  ucp_Limbu,    ucp_Tai_Le, -  ucp_Tamil, -  ucp_Telugu, -  ucp_Thaana, +  ucp_Linear_B, +  ucp_Cypriot, +  ucp_Buginese, +  ucp_Coptic, +  ucp_Glagolitic, +  ucp_Syloti_Nagri, +  ucp_Phags_Pa, +  ucp_Nko, +  ucp_Kayah_Li, +  ucp_Javanese, +  ucp_Kaithi, +  ucp_Mandaic, +  ucp_Chakma, +  ucp_Sharada, +  ucp_Takri, +  ucp_Duployan, +  ucp_Grantha, +  ucp_Khojki, +  ucp_Linear_A, +  ucp_Mahajani, +  ucp_Manichaean, +  ucp_Modi, +  ucp_Old_Permic, +  ucp_Psalter_Pahlavi, +  ucp_Khudawadi, +  ucp_Tirhuta, +  ucp_Multani, +  ucp_Adlam, +  ucp_Masaram_Gondi, +  ucp_Dogra, +  ucp_Gunjala_Gondi, +  ucp_Hanifi_Rohingya, +  ucp_Sogdian, +  ucp_Nandinagari, +  ucp_Yezidi, +  ucp_Cypro_Minoan, +  ucp_Old_Uyghur, + +  /* Scripts which has no characters in other scripts. */ +  ucp_Unknown, +  ucp_Common, +  ucp_Armenian, +  ucp_Hebrew,    ucp_Thai, +  ucp_Lao,    ucp_Tibetan, -  ucp_Tifinagh, +  ucp_Ethiopic, +  ucp_Cherokee, +  ucp_Canadian_Aboriginal, +  ucp_Ogham, +  ucp_Runic, +  ucp_Khmer, +  ucp_Old_Italic, +  ucp_Gothic, +  ucp_Deseret, +  ucp_Inherited,    ucp_Ugaritic, -  ucp_Yi, -  /* New for Unicode 5.0 */ +  ucp_Shavian, +  ucp_Osmanya, +  ucp_Braille, +  ucp_New_Tai_Lue, +  ucp_Tifinagh, +  ucp_Old_Persian, +  ucp_Kharoshthi,    ucp_Balinese,    ucp_Cuneiform, -  ucp_Nko, -  ucp_Phags_Pa,    ucp_Phoenician, -  /* New for Unicode 5.1 */ -  ucp_Carian, -  ucp_Cham, -  ucp_Kayah_Li, +  ucp_Sundanese,    ucp_Lepcha, -  ucp_Lycian, -  ucp_Lydian,    ucp_Ol_Chiki, -  ucp_Rejang, -  ucp_Saurashtra, -  ucp_Sundanese,    ucp_Vai, -  /* New for Unicode 5.2 */ +  ucp_Saurashtra, +  ucp_Rejang, +  ucp_Lycian, +  ucp_Carian, +  ucp_Lydian, +  ucp_Cham, +  ucp_Tai_Tham, +  ucp_Tai_Viet,    ucp_Avestan, -  ucp_Bamum,    ucp_Egyptian_Hieroglyphs, -  ucp_Imperial_Aramaic, -  ucp_Inscriptional_Pahlavi, -  ucp_Inscriptional_Parthian, -  ucp_Javanese, -  ucp_Kaithi, +  ucp_Samaritan,    ucp_Lisu, +  ucp_Bamum,    ucp_Meetei_Mayek, +  ucp_Imperial_Aramaic,    ucp_Old_South_Arabian, +  ucp_Inscriptional_Parthian, +  ucp_Inscriptional_Pahlavi,    ucp_Old_Turkic, -  ucp_Samaritan, -  ucp_Tai_Tham, -  ucp_Tai_Viet, -  /* New for Unicode 6.0.0 */    ucp_Batak,    ucp_Brahmi, -  ucp_Mandaic, -  /* New for Unicode 6.1.0 */ -  ucp_Chakma,    ucp_Meroitic_Cursive,    ucp_Meroitic_Hieroglyphs,    ucp_Miao, -  ucp_Sharada,    ucp_Sora_Sompeng, -  ucp_Takri, -  /* New for Unicode 7.0.0 */ -  ucp_Bassa_Vah,    ucp_Caucasian_Albanian, -  ucp_Duployan, +  ucp_Bassa_Vah,    ucp_Elbasan, -  ucp_Grantha, -  ucp_Khojki, -  ucp_Khudawadi, -  ucp_Linear_A, -  ucp_Mahajani, -  ucp_Manichaean, +  ucp_Pahawh_Hmong,    ucp_Mende_Kikakui, -  ucp_Modi,    ucp_Mro, -  ucp_Nabataean,    ucp_Old_North_Arabian, -  ucp_Old_Permic, -  ucp_Pahawh_Hmong, +  ucp_Nabataean,    ucp_Palmyrene, -  ucp_Psalter_Pahlavi,    ucp_Pau_Cin_Hau,    ucp_Siddham, -  ucp_Tirhuta,    ucp_Warang_Citi, -  /* New for Unicode 8.0.0 */    ucp_Ahom,    ucp_Anatolian_Hieroglyphs,    ucp_Hatran, -  ucp_Multani,    ucp_Old_Hungarian,    ucp_SignWriting, -  /* New for Unicode 10.0.0 (no update since 8.0.0) */ -  ucp_Adlam,    ucp_Bhaiksuki,    ucp_Marchen,    ucp_Newa,    ucp_Osage,    ucp_Tangut, -  ucp_Masaram_Gondi,    ucp_Nushu,    ucp_Soyombo,    ucp_Zanabazar_Square, -  /* New for Unicode 11.0.0 */ -  ucp_Dogra, -  ucp_Gunjala_Gondi, -  ucp_Hanifi_Rohingya,    ucp_Makasar,    ucp_Medefaidrin,    ucp_Old_Sogdian, -  ucp_Sogdian, -  /* New for Unicode 12.0.0 */    ucp_Elymaic, -  ucp_Nandinagari,    ucp_Nyiakeng_Puachue_Hmong,    ucp_Wancho, -  /* New for Unicode 13.0.0 */    ucp_Chorasmian,    ucp_Dives_Akuru,    ucp_Khitan_Small_Script, -  ucp_Yezidi, -  /* New for Unicode 14.0.0 */ -  ucp_Cypro_Minoan, -  ucp_Old_Uyghur,    ucp_Tangsa,    ucp_Toto, -  ucp_Vithkuqi +  ucp_Vithkuqi, + +  /* This must be last */ +  ucp_Script_Count  }; +/* Size of entries in ucd_script_sets[] */ + +#define ucd_script_sets_item_size 3 +  #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */  /* End of pcre2_ucp.h */ |