summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common/uniset_props.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/uniset_props.cpp')
-rw-r--r--thirdparty/icu4c/common/uniset_props.cpp116
1 files changed, 42 insertions, 74 deletions
diff --git a/thirdparty/icu4c/common/uniset_props.cpp b/thirdparty/icu4c/common/uniset_props.cpp
index 37277fcb75..8fde5abcdd 100644
--- a/thirdparty/icu4c/common/uniset_props.cpp
+++ b/thirdparty/icu4c/common/uniset_props.cpp
@@ -47,31 +47,6 @@
U_NAMESPACE_USE
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN ((UChar)0x005B) /*[*/
-#define SET_CLOSE ((UChar)0x005D) /*]*/
-#define HYPHEN ((UChar)0x002D) /*-*/
-#define COMPLEMENT ((UChar)0x005E) /*^*/
-#define COLON ((UChar)0x003A) /*:*/
-#define BACKSLASH ((UChar)0x005C) /*\*/
-#define INTERSECTION ((UChar)0x0026) /*&*/
-#define UPPER_U ((UChar)0x0055) /*U*/
-#define LOWER_U ((UChar)0x0075) /*u*/
-#define OPEN_BRACE ((UChar)123) /*{*/
-#define CLOSE_BRACE ((UChar)125) /*}*/
-#define UPPER_P ((UChar)0x0050) /*P*/
-#define LOWER_P ((UChar)0x0070) /*p*/
-#define UPPER_N ((UChar)78) /*N*/
-#define EQUALS ((UChar)0x003D) /*=*/
-
-//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
-static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
-//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
-//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
-//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
-static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
-
// Special property set IDs
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
@@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
#define NAME_PROP "na"
#define NAME_PROP_LENGTH 2
-/**
- * Delimiter string used in patterns to close a category reference:
- * ":]". Example: "[:Lu:]".
- */
-//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
-
// Cached sets ------------------------------------------------------------- ***
U_CDECL_BEGIN
@@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
static inline UBool
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
UChar c;
- return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
+ return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
}
/*static inline UBool
isPerlClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==CLOSE_BRACE;
+ return pattern.charAt(pos)==u'}';
}*/
static inline UBool
isNameOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
+ return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
}
static inline UBool
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
+ return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
}
/*static inline UBool
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
+ return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
}*/
// TODO memory debugging provided inside uniset.cpp
@@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (mode != 2 && !chars.atEnd()) {
U_ASSERT((lastItem == 0 && op == 0) ||
- (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
- (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
- op == INTERSECTION /*'&'*/)));
+ (lastItem == 1 && (op == 0 || op == u'-')) ||
+ (lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
UChar32 c = 0;
UBool literal = FALSE;
@@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5B /*'['*/ && !literal) {
+ if (c == u'[' && !literal) {
if (mode == 1) {
chars.setPos(backup); // backup
setMode = 1;
} else {
// Handle opening '[' delimiter
mode = 1;
- patLocal.append((UChar) 0x5B /*'['*/);
+ patLocal.append(u'[');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5E /*'^'*/ && !literal) {
+ if (c == u'^' && !literal) {
invert = TRUE;
- patLocal.append((UChar) 0x5E /*'^'*/);
+ patLocal.append(u'^');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
}
// Fall through to handle special leading '-';
// otherwise restart loop for nested [], \p{}, etc.
- if (c == HYPHEN /*'-'*/) {
+ if (c == u'-') {
literal = TRUE;
// Fall through to handle literal '-' below
} else {
@@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
op = 0;
}
- if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
+ if (op == u'-' || op == u'&') {
patLocal.append(op);
}
@@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (op) {
- case HYPHEN: /*'-'*/
+ case u'-':
removeAll(*nested);
break;
- case INTERSECTION: /*'&'*/
+ case u'&':
retainAll(*nested);
break;
case 0:
@@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
if (!literal) {
switch (c) {
- case 0x5D /*']'*/:
+ case u']':
if (lastItem == 1) {
add(lastChar, lastChar);
_appendToPat(patLocal, lastChar, FALSE);
}
// Treat final trailing '-' as a literal
- if (op == HYPHEN /*'-'*/) {
+ if (op == u'-') {
add(op, op);
patLocal.append(op);
- } else if (op == INTERSECTION /*'&'*/) {
+ } else if (op == u'&') {
// syntaxError(chars, "Trailing '&'");
ec = U_MALFORMED_SET;
return;
}
- patLocal.append((UChar) 0x5D /*']'*/);
+ patLocal.append(u']');
mode = 2;
continue;
- case HYPHEN /*'-'*/:
+ case u'-':
if (op == 0) {
if (lastItem != 0) {
op = (UChar) c;
@@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(c, c);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5D /*']'*/ && !literal) {
- patLocal.append(HYPHEN_RIGHT_BRACE, 2);
+ if (c == u']' && !literal) {
+ patLocal.append(u"-]", 2);
mode = 2;
continue;
}
@@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'-' not after char or set");
ec = U_MALFORMED_SET;
return;
- case INTERSECTION /*'&'*/:
+ case u'&':
if (lastItem == 2 && op == 0) {
op = (UChar) c;
continue;
@@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'&' not after set");
ec = U_MALFORMED_SET;
return;
- case 0x5E /*'^'*/:
+ case u'^':
// syntaxError(chars, "'^' not after '['");
ec = U_MALFORMED_SET;
return;
- case 0x7B /*'{'*/:
+ case u'{':
if (op != 0) {
// syntaxError(chars, "Missing operand after operator");
ec = U_MALFORMED_SET;
@@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (!chars.atEnd()) {
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x7D /*'}'*/ && !literal) {
+ if (c == u'}' && !literal) {
ok = TRUE;
break;
}
buf.append(c);
}
- if (buf.length() < 1 || !ok) {
+ if (!ok) {
// syntaxError(chars, "Invalid multicharacter string");
ec = U_MALFORMED_SET;
return;
@@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// we don't need to drop through to the further
// processing
add(buf);
- patLocal.append((UChar) 0x7B /*'{'*/);
+ patLocal.append(u'{');
_appendToPat(patLocal, buf, FALSE);
- patLocal.append((UChar) 0x7D /*'}'*/);
+ patLocal.append(u'}');
continue;
case SymbolTable::SYMBOL_REF:
// symbols nosymbols
@@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
chars.getPos(backup);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- UBool anchor = (c == 0x5D /*']'*/ && !literal);
+ UBool anchor = (c == u']' && !literal);
if (symbols == 0 && !anchor) {
c = SymbolTable::SYMBOL_REF;
chars.setPos(backup);
@@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(U_ETHER);
usePat = TRUE;
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
- patLocal.append((UChar) 0x5D /*']'*/);
+ patLocal.append(u']');
mode = 2;
continue;
}
@@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
lastChar = c;
break;
case 1:
- if (op == HYPHEN /*'-'*/) {
+ if (op == u'-') {
if (lastChar >= c) {
// Don't allow redundant (a-a) or empty (b-a) ranges;
// these are most likely typos.
@@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
RuleCharacterIterator::Pos pos;
chars.getPos(pos);
UChar32 c = chars.next(iterOpts, literal, ec);
- if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
+ if (c == u'[' || c == u'\\') {
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
literal, ec);
- result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
- (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
+ result = (c == u'[') ? (d == u':') :
+ (d == u'N' || d == u'p' || d == u'P');
}
chars.setPos(pos);
return result && U_SUCCESS(ec);
@@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
posix = TRUE;
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
+ if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
++pos;
invert = TRUE;
}
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
UChar c = pattern.charAt(pos+1);
- invert = (c == UPPER_P);
- isName = (c == UPPER_N);
+ invert = (c == u'P');
+ isName = (c == u'N');
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
+ if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
// Syntax error; "\p" or "\P" not followed by "{"
FAIL(ec);
}
@@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for the matching close delimiter, either :] or }
int32_t close;
if (posix) {
- close = pattern.indexOf(POSIX_CLOSE, 2, pos);
+ close = pattern.indexOf(u":]", 2, pos);
} else {
- close = pattern.indexOf(CLOSE_BRACE, pos);
+ close = pattern.indexOf(u'}', pos);
}
if (close < 0) {
// Syntax error; close delimiter missing
@@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for an '=' sign. If this is present, we will parse a
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
// pattern.
- int32_t equals = pattern.indexOf(EQUALS, pos);
+ int32_t equals = pattern.indexOf(u'=', pos);
UnicodeString propName, valueName;
if (equals >= 0 && equals < close && !isName) {
// Equals seen; parse medium/long pattern