1 files changed, 221 insertions, 132 deletions
diff --git a/thirdparty/pcre2/src/pcre2_ucp.h b/thirdparty/pcre2/src/pcre2_ucp.h
index d84f269e87..282238982d 100644
--- a/thirdparty/pcre2/src/pcre2_ucp.h
+++ b/thirdparty/pcre2/src/pcre2_ucp.h
@@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
+a new version of this code.
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
-
 #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
 #define PCRE2_UCP_H_IDEMPOTENT_GUARD
 
-/* This file contains definitions of the property values that are returned by
-the UCD access macros. New values that are added for new releases of Unicode
-should always be at the end of each enum, for backwards compatibility.
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
 
-IMPORTANT: Note also that the specific numeric values of the enums have to be
-the same as the values that are generated by the maint/MultiStage2.py script,
-where the equivalent property descriptive names are listed in vectors.
-
-ALSO: The specific values of the first two enums are assumed for the table
-called catposstab in pcre2_compile.c. */
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
 
 /* These are the general character categories. */
 
 enum {
-  ucp_C,     /* Other */
-  ucp_L,     /* Letter */
-  ucp_M,     /* Mark */
-  ucp_N,     /* Number */
-  ucp_P,     /* Punctuation */
-  ucp_S,     /* Symbol */
-  ucp_Z      /* Separator */
+  ucp_C,
+  ucp_L,
+  ucp_M,
+  ucp_N,
+  ucp_P,
+  ucp_S,
+  ucp_Z,
 };
 
 /* These are the particular character categories. */
@@ -97,7 +97,98 @@ enum {
   ucp_So,    /* Other symbol */
   ucp_Zl,    /* Line separator */
   ucp_Zp,    /* Paragraph separator */
-  ucp_Zs     /* Space separator */
+  ucp_Zs,    /* Space separator */
+};
+
+/* These are Boolean properties. */
+
+enum {
+  ucp_ASCII,
+  ucp_ASCII_Hex_Digit,
+  ucp_Alphabetic,
+  ucp_Bidi_Control,
+  ucp_Bidi_Mirrored,
+  ucp_Case_Ignorable,
+  ucp_Cased,
+  ucp_Changes_When_Casefolded,
+  ucp_Changes_When_Casemapped,
+  ucp_Changes_When_Lowercased,
+  ucp_Changes_When_Titlecased,
+  ucp_Changes_When_Uppercased,
+  ucp_Dash,
+  ucp_Default_Ignorable_Code_Point,
+  ucp_Deprecated,
+  ucp_Diacritic,
+  ucp_Emoji,
+  ucp_Emoji_Component,
+  ucp_Emoji_Modifier,
+  ucp_Emoji_Modifier_Base,
+  ucp_Emoji_Presentation,
+  ucp_Extended_Pictographic,
+  ucp_Extender,
+  ucp_Grapheme_Base,
+  ucp_Grapheme_Extend,
+  ucp_Grapheme_Link,
+  ucp_Hex_Digit,
+  ucp_IDS_Binary_Operator,
+  ucp_IDS_Trinary_Operator,
+  ucp_ID_Continue,
+  ucp_ID_Start,
+  ucp_Ideographic,
+  ucp_Join_Control,
+  ucp_Logical_Order_Exception,
+  ucp_Lowercase,
+  ucp_Math,
+  ucp_Noncharacter_Code_Point,
+  ucp_Pattern_Syntax,
+  ucp_Pattern_White_Space,
+  ucp_Prepended_Concatenation_Mark,
+  ucp_Quotation_Mark,
+  ucp_Radical,
+  ucp_Regional_Indicator,
+  ucp_Sentence_Terminal,
+  ucp_Soft_Dotted,
+  ucp_Terminal_Punctuation,
+  ucp_Unified_Ideograph,
+  ucp_Uppercase,
+  ucp_Variation_Selector,
+  ucp_White_Space,
+  ucp_XID_Continue,
+  ucp_XID_Start,
+  /* This must be last */
+  ucp_Bprop_Count
+};
+
+/* Size of entries in ucd_boolprop_sets[] */
+
+#define ucd_boolprop_sets_item_size 2
+
+/* These are the bidi class values. */
+
+enum {
+  ucp_bidiAL,   /* Arabic letter */
+  ucp_bidiAN,   /* Arabic number */
+  ucp_bidiB,    /* Paragraph separator */
+  ucp_bidiBN,   /* Boundary neutral */
+  ucp_bidiCS,   /* Common separator */
+  ucp_bidiEN,   /* European number */
+  ucp_bidiES,   /* European separator */
+  ucp_bidiET,   /* European terminator */
+  ucp_bidiFSI,  /* First strong isolate */
+  ucp_bidiL,    /* Left to right */
+  ucp_bidiLRE,  /* Left to right embedding */
+  ucp_bidiLRI,  /* Left to right isolate */
+  ucp_bidiLRO,  /* Left to right override */
+  ucp_bidiNSM,  /* Non-spacing mark */
+  ucp_bidiON,   /* Other neutral */
+  ucp_bidiPDF,  /* Pop directional format */
+  ucp_bidiPDI,  /* Pop directional isolate */
+  ucp_bidiR,    /* Right to left */
+  ucp_bidiRLE,  /* Right to left embedding */
+  ucp_bidiRLI,  /* Right to left isolate */
+  ucp_bidiRLO,  /* Right to left override */
+  ucp_bidiS,    /* Segment separator */
+  ucp_bidiWS,   /* White space */
 };
 
 /* These are grapheme break properties. The Extended Pictographic property
@@ -115,191 +206,189 @@ enum {
   ucp_gbT,                     /*  8 Hangul syllable type T */
   ucp_gbLV,                    /*  9 Hangul syllable type LV */
   ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
-  ucp_gbRegionalIndicator,     /* 11 */
+  ucp_gbRegional_Indicator,    /* 11 */
   ucp_gbOther,                 /* 12 */
   ucp_gbZWJ,                   /* 13 */
-  ucp_gbExtended_Pictographic  /* 14 */
+  ucp_gbExtended_Pictographic, /* 14 */
 };
 
 /* These are the script identifications. */
 
 enum {
-  ucp_Unknown,
-  ucp_Arabic,
-  ucp_Armenian,
-  ucp_Bengali,
-  ucp_Bopomofo,
-  ucp_Braille,
-  ucp_Buginese,
-  ucp_Buhid,
-  ucp_Canadian_Aboriginal,
-  ucp_Cherokee,
-  ucp_Common,
-  ucp_Coptic,
-  ucp_Cypriot,
+  /* Scripts which has characters in other scripts. */
+  ucp_Latin,
+  ucp_Greek,
   ucp_Cyrillic,
-  ucp_Deseret,
+  ucp_Arabic,
+  ucp_Syriac,
+  ucp_Thaana,
   ucp_Devanagari,
-  ucp_Ethiopic,
-  ucp_Georgian,
-  ucp_Glagolitic,
-  ucp_Gothic,
-  ucp_Greek,
-  ucp_Gujarati,
+  ucp_Bengali,
   ucp_Gurmukhi,
-  ucp_Han,
-  ucp_Hangul,
-  ucp_Hanunoo,
-  ucp_Hebrew,
-  ucp_Hiragana,
-  ucp_Inherited,
+  ucp_Gujarati,
+  ucp_Oriya,
+  ucp_Tamil,
+  ucp_Telugu,
   ucp_Kannada,
-  ucp_Katakana,
-  ucp_Kharoshthi,
-  ucp_Khmer,
-  ucp_Lao,
-  ucp_Latin,
-  ucp_Limbu,
-  ucp_Linear_B,
   ucp_Malayalam,
-  ucp_Mongolian,
-  ucp_Myanmar,
-  ucp_New_Tai_Lue,
-  ucp_Ogham,
-  ucp_Old_Italic,
-  ucp_Old_Persian,
-  ucp_Oriya,
-  ucp_Osmanya,
-  ucp_Runic,
-  ucp_Shavian,
   ucp_Sinhala,
-  ucp_Syloti_Nagri,
-  ucp_Syriac,
+  ucp_Myanmar,
+  ucp_Georgian,
+  ucp_Hangul,
+  ucp_Mongolian,
+  ucp_Hiragana,
+  ucp_Katakana,
+  ucp_Bopomofo,
+  ucp_Han,
+  ucp_Yi,
   ucp_Tagalog,
+  ucp_Hanunoo,
+  ucp_Buhid,
   ucp_Tagbanwa,
+  ucp_Limbu,
   ucp_Tai_Le,
-  ucp_Tamil,
-  ucp_Telugu,
-  ucp_Thaana,
+  ucp_Linear_B,
+  ucp_Cypriot,
+  ucp_Buginese,
+  ucp_Coptic,
+  ucp_Glagolitic,
+  ucp_Syloti_Nagri,
+  ucp_Phags_Pa,
+  ucp_Nko,
+  ucp_Kayah_Li,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Mandaic,
+  ucp_Chakma,
+  ucp_Sharada,
+  ucp_Takri,
+  ucp_Duployan,
+  ucp_Grantha,
+  ucp_Khojki,
+  ucp_Linear_A,
+  ucp_Mahajani,
+  ucp_Manichaean,
+  ucp_Modi,
+  ucp_Old_Permic,
+  ucp_Psalter_Pahlavi,
+  ucp_Khudawadi,
+  ucp_Tirhuta,
+  ucp_Multani,
+  ucp_Adlam,
+  ucp_Masaram_Gondi,
+  ucp_Dogra,
+  ucp_Gunjala_Gondi,
+  ucp_Hanifi_Rohingya,
+  ucp_Sogdian,
+  ucp_Nandinagari,
+  ucp_Yezidi,
+  ucp_Cypro_Minoan,
+  ucp_Old_Uyghur,
+
+  /* Scripts which has no characters in other scripts. */
+  ucp_Unknown,
+  ucp_Common,
+  ucp_Armenian,
+  ucp_Hebrew,
   ucp_Thai,
+  ucp_Lao,
   ucp_Tibetan,
-  ucp_Tifinagh,
+  ucp_Ethiopic,
+  ucp_Cherokee,
+  ucp_Canadian_Aboriginal,
+  ucp_Ogham,
+  ucp_Runic,
+  ucp_Khmer,
+  ucp_Old_Italic,
+  ucp_Gothic,
+  ucp_Deseret,
+  ucp_Inherited,
   ucp_Ugaritic,
-  ucp_Yi,
-  /* New for Unicode 5.0 */
+  ucp_Shavian,
+  ucp_Osmanya,
+  ucp_Braille,
+  ucp_New_Tai_Lue,
+  ucp_Tifinagh,
+  ucp_Old_Persian,
+  ucp_Kharoshthi,
   ucp_Balinese,
   ucp_Cuneiform,
-  ucp_Nko,
-  ucp_Phags_Pa,
   ucp_Phoenician,
-  /* New for Unicode 5.1 */
-  ucp_Carian,
-  ucp_Cham,
-  ucp_Kayah_Li,
+  ucp_Sundanese,
   ucp_Lepcha,
-  ucp_Lycian,
-  ucp_Lydian,
   ucp_Ol_Chiki,
-  ucp_Rejang,
-  ucp_Saurashtra,
-  ucp_Sundanese,
   ucp_Vai,
-  /* New for Unicode 5.2 */
+  ucp_Saurashtra,
+  ucp_Rejang,
+  ucp_Lycian,
+  ucp_Carian,
+  ucp_Lydian,
+  ucp_Cham,
+  ucp_Tai_Tham,
+  ucp_Tai_Viet,
   ucp_Avestan,
-  ucp_Bamum,
   ucp_Egyptian_Hieroglyphs,
-  ucp_Imperial_Aramaic,
-  ucp_Inscriptional_Pahlavi,
-  ucp_Inscriptional_Parthian,
-  ucp_Javanese,
-  ucp_Kaithi,
+  ucp_Samaritan,
   ucp_Lisu,
+  ucp_Bamum,
   ucp_Meetei_Mayek,
+  ucp_Imperial_Aramaic,
   ucp_Old_South_Arabian,
+  ucp_Inscriptional_Parthian,
+  ucp_Inscriptional_Pahlavi,
   ucp_Old_Turkic,
-  ucp_Samaritan,
-  ucp_Tai_Tham,
-  ucp_Tai_Viet,
-  /* New for Unicode 6.0.0 */
   ucp_Batak,
   ucp_Brahmi,
-  ucp_Mandaic,
-  /* New for Unicode 6.1.0 */
-  ucp_Chakma,
   ucp_Meroitic_Cursive,
   ucp_Meroitic_Hieroglyphs,
   ucp_Miao,
-  ucp_Sharada,
   ucp_Sora_Sompeng,
-  ucp_Takri,
-  /* New for Unicode 7.0.0 */
-  ucp_Bassa_Vah,
   ucp_Caucasian_Albanian,
-  ucp_Duployan,
+  ucp_Bassa_Vah,
   ucp_Elbasan,
-  ucp_Grantha,
-  ucp_Khojki,
-  ucp_Khudawadi,
-  ucp_Linear_A,
-  ucp_Mahajani,
-  ucp_Manichaean,
+  ucp_Pahawh_Hmong,
   ucp_Mende_Kikakui,
-  ucp_Modi,
   ucp_Mro,
-  ucp_Nabataean,
   ucp_Old_North_Arabian,
-  ucp_Old_Permic,
-  ucp_Pahawh_Hmong,
+  ucp_Nabataean,
   ucp_Palmyrene,
-  ucp_Psalter_Pahlavi,
   ucp_Pau_Cin_Hau,
   ucp_Siddham,
-  ucp_Tirhuta,
   ucp_Warang_Citi,
-  /* New for Unicode 8.0.0 */
   ucp_Ahom,
   ucp_Anatolian_Hieroglyphs,
   ucp_Hatran,
-  ucp_Multani,
   ucp_Old_Hungarian,
   ucp_SignWriting,
-  /* New for Unicode 10.0.0 (no update since 8.0.0) */
-  ucp_Adlam,
   ucp_Bhaiksuki,
   ucp_Marchen,
   ucp_Newa,
   ucp_Osage,
   ucp_Tangut,
-  ucp_Masaram_Gondi,
   ucp_Nushu,
   ucp_Soyombo,
   ucp_Zanabazar_Square,
-  /* New for Unicode 11.0.0 */
-  ucp_Dogra,
-  ucp_Gunjala_Gondi,
-  ucp_Hanifi_Rohingya,
   ucp_Makasar,
   ucp_Medefaidrin,
   ucp_Old_Sogdian,
-  ucp_Sogdian,
-  /* New for Unicode 12.0.0 */
   ucp_Elymaic,
-  ucp_Nandinagari,
   ucp_Nyiakeng_Puachue_Hmong,
   ucp_Wancho,
-  /* New for Unicode 13.0.0 */
   ucp_Chorasmian,
   ucp_Dives_Akuru,
   ucp_Khitan_Small_Script,
-  ucp_Yezidi,
-  /* New for Unicode 14.0.0 */
-  ucp_Cypro_Minoan,
-  ucp_Old_Uyghur,
   ucp_Tangsa,
   ucp_Toto,
-  ucp_Vithkuqi
+  ucp_Vithkuqi,
+
+  /* This must be last */
+  ucp_Script_Count
 };
 
+/* Size of entries in ucd_script_sets[] */
+
+#define ucd_script_sets_item_size 3
+
 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
 
 /* End of pcre2_ucp.h */