diff options
Diffstat (limited to 'thirdparty/icu4c/common/unicode/uniset.h')
-rw-r--r-- | thirdparty/icu4c/common/unicode/uniset.h | 75 |
1 files changed, 53 insertions, 22 deletions
diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h index 8403c4026c..730337a353 100644 --- a/thirdparty/icu4c/common/unicode/uniset.h +++ b/thirdparty/icu4c/common/unicode/uniset.h @@ -124,8 +124,8 @@ class RuleCharacterIterator; * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a * complete list of supported property patterns, see the User's Guide * for UnicodeSet at - * <a href="http://icu-project.org/userguide/unicodeSet.html"> - * http://icu-project.org/userguide/unicodeSet.html</a>. + * <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset"> + * https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>. * Actual determination of property data is defined by the underlying * Unicode database as implemented by UCharacter. * @@ -136,6 +136,13 @@ class RuleCharacterIterator; * their delimiters; "[:^foo]" and "\\P{foo}". In any other location, * '^' has no special meaning. * + * <p>Since ICU 70, "[^...]", "[:^foo]", "\\P{foo}", and "[:binaryProperty=No:]" + * perform a “code point complement” (all code points minus the original set), + * removing all multicharacter strings, + * equivalent to <code>.complement().removeAllStrings()</code>. + * The complement() API function continues to perform a + * symmetric difference with all code points and thus retains all multicharacter strings. + * * <p>Ranges are indicated by placing two a '-' between two * characters, as in "a-z". This specifies the range of all * characters from the left to the right, in Unicode order. If the @@ -217,9 +224,8 @@ class RuleCharacterIterator; * </tr> * <tr align="top"> * <td nowrap valign="top" align="right"><code>hex := </code></td> - * <td valign="top"><em>any character for which - * </em><code>Character.digit(c, 16)</code><em> - * returns a non-negative result</em></td> + * <td valign="top"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br> + * 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td> * </tr> * <tr> * <td nowrap valign="top" align="right"><code>property := </code></td> @@ -485,14 +491,14 @@ public: * @return <tt>true</tt> if the specified set is equal to this set. * @stable ICU 2.0 */ - virtual UBool operator==(const UnicodeSet& o) const; + virtual bool operator==(const UnicodeSet& o) const; /** * Compares the specified object with this set for equality. Returns * <tt>true</tt> if the specified set is not equal to this set. * @stable ICU 2.0 */ - inline UBool operator!=(const UnicodeSet& o) const; + inline bool operator!=(const UnicodeSet& o) const; /** * Returns a copy of this object. All UnicodeFunctor objects have @@ -503,7 +509,7 @@ public: * @see cloneAsThawed * @stable ICU 2.0 */ - virtual UnicodeSet* clone() const; + virtual UnicodeSet* clone() const override; /** * Returns the hash code value for this set. @@ -705,7 +711,7 @@ public: * @stable ICU 2.0 */ virtual UnicodeString& toPattern(UnicodeString& result, - UBool escapeUnprintable = false) const; + UBool escapeUnprintable = false) const override; /** * Modifies this set to contain those code points which have the given value @@ -771,8 +777,12 @@ public: * Note than the elements of a set may include both individual * codepoints and strings. * + * This is slower than getRangeCount() because + * it counts the code points of all ranges. + * * @return the number of elements in this set (its cardinality). * @stable ICU 2.0 + * @see getRangeCount */ virtual int32_t size(void) const; @@ -784,6 +794,14 @@ public: */ virtual UBool isEmpty(void) const; +#ifndef U_HIDE_DRAFT_API + /** + * @return true if this set contains multi-character strings or the empty string. + * @draft ICU 70 + */ + UBool hasStrings() const; +#endif // U_HIDE_DRAFT_API + /** * Returns true if this set contains the given character. * This function works faster with a frozen set. @@ -791,7 +809,7 @@ public: * @return true if the test condition is met * @stable ICU 2.0 */ - virtual UBool contains(UChar32 c) const; + virtual UBool contains(UChar32 c) const override; /** * Returns true if this set contains every character @@ -1000,7 +1018,7 @@ public: virtual UMatchDegree matches(const Replaceable& text, int32_t& offset, int32_t limit, - UBool incremental); + UBool incremental) override; private: /** @@ -1049,7 +1067,7 @@ public: * @param toUnionTo the set into which to union the source characters * @stable ICU 2.4 */ - virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override; /** * Returns the index of the given character within this set, where @@ -1064,8 +1082,14 @@ public: /** * Returns the character at the given index within this set, where * the set is ordered by ascending code point. If the index is - * out of range, return (UChar32)-1. The inverse of this method is - * <code>indexOf()</code>. + * out of range for characters, returns (UChar32)-1. + * The inverse of this method is <code>indexOf()</code>. + * + * For iteration, this is slower than UnicodeSetIterator or + * getRangeCount()/getRangeStart()/getRangeEnd(), + * because for each call it skips linearly over <code>index</code> + * characters in the ranges. + * * @param index an index from 0..size()-1 * @return the character at the given index, or (UChar32)-1. * @stable ICU 2.4 @@ -1258,13 +1282,18 @@ public: UnicodeSet& remove(const UnicodeString& s); /** - * Inverts this set. This operation modifies this set so that - * its value is its complement. This is equivalent to + * This is equivalent to * <code>complement(MIN_VALUE, MAX_VALUE)</code>. + * + * <strong>Note:</strong> This performs a symmetric difference with all code points + * <em>and thus retains all multicharacter strings</em>. + * In order to achieve a “code point complement” (all code points minus this set), + * the easiest is to <code>.complement().removeAllStrings()</code>. + * * A frozen set will not be modified. * @stable ICU 2.0 */ - virtual UnicodeSet& complement(void); + virtual UnicodeSet& complement(); /** * Complements the specified range in this set. Any character in @@ -1504,7 +1533,7 @@ public: * different class IDs. * @stable ICU 2.4 */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; private: @@ -1525,7 +1554,7 @@ private: * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for * indexing. */ - virtual UBool matchesIndexValue(uint8_t v) const; + virtual UBool matchesIndexValue(uint8_t v) const override; private: friend class RBBIRuleScanner; @@ -1567,7 +1596,6 @@ private: void swapBuffers(void); UBool allocateStrings(UErrorCode &status); - UBool hasStrings() const; int32_t stringsSize() const; UBool stringsContains(const UnicodeString &s) const; @@ -1581,6 +1609,9 @@ private: static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); + static void _appendToPat(UnicodeString &result, UChar32 start, UChar32 end, + UBool escapeUnprintable); + //---------------------------------------------------------------- // Implementation: Fundamental operators //---------------------------------------------------------------- @@ -1608,7 +1639,7 @@ private: * * The original design document is out of date, but still useful. * Ignore the property and value names: - * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html + * https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/unicodeset_properties.html * * Recognized syntax: * @@ -1693,7 +1724,7 @@ private: -inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { +inline bool UnicodeSet::operator!=(const UnicodeSet& o) const { return !operator==(o); } |