From 5383ae005cab1deafc1d822b473cb2b73df6f8df Mon Sep 17 00:00:00 2001 From: firefly2442 Date: Thu, 24 May 2018 00:13:24 -0600 Subject: update PCRE2 to version 10.31, fixes #15662 --- thirdparty/pcre2/src/pcre2_tables.c | 448 ++++++++++++++++++++---------------- 1 file changed, 248 insertions(+), 200 deletions(-) (limited to 'thirdparty/pcre2/src/pcre2_tables.c') diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index b945ed7a7f..9f8dc293aa 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE. */ /* This module contains some fixed tables that are used by more than one of the -PCRE code modules. The tables are also #included by the pcre2test program, +PCRE2 code modules. The tables are also #included by the pcre2test program, which uses macros to change their names from _pcre2_xxx to xxxx, thereby avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is defined. */ @@ -148,7 +148,7 @@ two code points. The breaking rules are as follows: 1. Break at the start and end of text (pretty obviously). -2. Do not break between a CR and LF; otherwise, break before and after +2. Do not break between a CR and LF; otherwise, break before and after controls. 3. Do not break Hangul syllable sequences, the rules for which are: @@ -157,44 +157,62 @@ two code points. The breaking rules are as follows: LV or V may be followed by V or T LVT or T may be followed by T -4. Do not break before extending characters. +4. Do not break before extending characters or zero-width-joiner (ZWJ). -The next two rules are only for extended grapheme clusters (but that's what we +The following rules are only for extended grapheme clusters (but that's what we are implementing). 5. Do not break before SpacingMarks. 6. Do not break after Prepend characters. -7. Otherwise, break everywhere. +7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed + by E_Modifier). Extend characters are allowed before the modifier; this + cannot be represented in this table, the code has to deal with it. + +8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or + E_Base_GAZ). + +9. Do not break within emoji flag sequences. That is, do not break between + regional indicator (RI) symbols if there are an odd number of RI characters + before the break point. This table encodes "join RI characters"; the code + has to deal with checking for previous adjoining RIs. + +10. Otherwise, break everywhere. */ +#define ESZ (1<