summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common/unisetspan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/unisetspan.cpp')
-rw-r--r--thirdparty/icu4c/common/unisetspan.cpp25
1 files changed, 19 insertions, 6 deletions
diff --git a/thirdparty/icu4c/common/unisetspan.cpp b/thirdparty/icu4c/common/unisetspan.cpp
index 68e44d91ee..fe0d74f5b2 100644
--- a/thirdparty/icu4c/common/unisetspan.cpp
+++ b/thirdparty/icu4c/common/unisetspan.cpp
@@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
UBool thisRelevant;
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
if(spanLength<length16) { // Relevant string.
@@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
- if(spanLength<length16) { // Relevant string.
+ if(spanLength<length16 && length16>0) { // Relevant string.
if(which&UTF16) {
if(which&CONTAINED) {
if(which&FWD) {
@@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
addToSpanNotSet(c);
}
}
- } else { // Irrelevant string.
+ } else { // Irrelevant string. (Also the empty string.)
if(which&UTF8) {
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
uint8_t *s8=utf8+utf8Count;
@@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanLengths[i];
if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanBackLengths[i];
if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
// Try to match the strings at pos.
for(i=0; i<stringsLength; ++i) {
if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
return pos; // There is a set element at pos.
}
@@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
// it is easier and we only need to know whether the string is irrelevant
// which is the same in either array.
if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
return pos; // There is a set element at pos.
}