diff options
Diffstat (limited to 'thirdparty/icu4c/common/unisetspan.cpp')
-rw-r--r-- | thirdparty/icu4c/common/unisetspan.cpp | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/thirdparty/icu4c/common/unisetspan.cpp b/thirdparty/icu4c/common/unisetspan.cpp index 68e44d91ee..fe0d74f5b2 100644 --- a/thirdparty/icu4c/common/unisetspan.cpp +++ b/thirdparty/icu4c/common/unisetspan.cpp @@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } UBool thisRelevant; spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED); if(spanLength<length16) { // Relevant string. @@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, const UChar *s16=string.getBuffer(); int32_t length16=string.length(); spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED); - if(spanLength<length16) { // Relevant string. + if(spanLength<length16 && length16>0) { // Relevant string. if(which&UTF16) { if(which&CONTAINED) { if(which&FWD) { @@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, addToSpanNotSet(c); } } - } else { // Irrelevant string. + } else { // Irrelevant string. (Also the empty string.) if(which&UTF8) { if(which&CONTAINED) { // Only necessary for LONGEST_MATCH. uint8_t *s8=utf8+utf8Count; @@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi for(i=0; i<stringsLength; ++i) { int32_t overlap=spanLengths[i]; if(overlap==ALL_CP_CONTAINED) { - continue; // Irrelevant string. + continue; // Irrelevant string. (Also the empty string.) } const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + U_ASSERT(length>0); // Try to match this string at pos-overlap..pos. if(overlap>=LONG_SPAN) { @@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } // Try to match this string at pos-overlap..pos. if(overlap>=LONG_SPAN) { @@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC for(i=0; i<stringsLength; ++i) { int32_t overlap=spanBackLengths[i]; if(overlap==ALL_CP_CONTAINED) { - continue; // Irrelevant string. + continue; // Irrelevant string. (Also the empty string.) } const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + U_ASSERT(length>0); // Try to match this string at pos-(length16-overlap)..pos-length16. if(overlap>=LONG_SPAN) { @@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } // Try to match this string at pos-(length16-overlap)..pos-length16. if(overlap>=LONG_SPAN) { @@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const { // Try to match the strings at pos. for(i=0; i<stringsLength; ++i) { if(spanLengths[i]==ALL_CP_CONTAINED) { - continue; // Irrelevant string. + continue; // Irrelevant string. (Also the empty string.) } const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + U_ASSERT(length>0); if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) { return pos; // There is a set element at pos. } @@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const // it is easier and we only need to know whether the string is irrelevant // which is the same in either array. if(spanLengths[i]==ALL_CP_CONTAINED) { - continue; // Irrelevant string. + continue; // Irrelevant string. (Also the empty string.) } const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + U_ASSERT(length>0); if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) { return pos; // There is a set element at pos. } |