diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-10-28 09:15:28 +0300 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-10-28 09:15:28 +0300 |
commit | 44a241b241af6453d4459c79b1a562c447e36636 (patch) | |
tree | 0b90326241b6e647b9f8991cddeee5e8d4d93e9f /thirdparty/icu4c/common/brkeng.cpp | |
parent | 157cba39331c5ca945c8c3bb2173c5363550a680 (diff) |
ICU: Update to version 70.1
Diffstat (limited to 'thirdparty/icu4c/common/brkeng.cpp')
-rw-r--r-- | thirdparty/icu4c/common/brkeng.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/thirdparty/icu4c/common/brkeng.cpp b/thirdparty/icu4c/common/brkeng.cpp index 78492db662..52e9c53621 100644 --- a/thirdparty/icu4c/common/brkeng.cpp +++ b/thirdparty/icu4c/common/brkeng.cpp @@ -25,6 +25,7 @@ #include "brkeng.h" #include "cmemory.h" #include "dictbe.h" +#include "lstmbe.h" #include "charstr.h" #include "dictionarydata.h" #include "mutex.h" @@ -77,7 +78,9 @@ int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - UVector32 &/*foundBreaks*/ ) const { + UVector32 &/*foundBreaks*/, + UErrorCode &status) const { + if (U_FAILURE(status)) return 0; UChar32 c = utext_current32(text); while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { utext_next32(text); // TODO: recast loop to work with post-increment operations. @@ -132,14 +135,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { static UMutex gBreakEngineMutex; Mutex m(&gBreakEngineMutex); - if (fEngines == NULL) { - UStack *engines = new UStack(_deleteEngine, NULL, status); - if (U_FAILURE(status) || engines == NULL) { + if (fEngines == nullptr) { + LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status); + if (U_FAILURE(status) ) { // Note: no way to return error code to caller. - delete engines; - return NULL; + return nullptr; } - fEngines = engines; + fEngines = engines.orphan(); } else { int32_t i = fEngines->size(); while (--i >= 0) { @@ -152,10 +154,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { // We didn't find an engine. Create one. lbe = loadEngineFor(c); - if (lbe != NULL) { + if (lbe != nullptr) { fEngines->push((void *)lbe, status); } - return lbe; + return U_SUCCESS(status) ? lbe : nullptr; } const LanguageBreakEngine * @@ -163,9 +165,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { + const LanguageBreakEngine *engine = nullptr; + // Try to use LSTM first + const LSTMData *data = CreateLSTMDataForScript(code, status); + if (U_SUCCESS(status)) { + if (data != nullptr) { + engine = CreateLSTMBreakEngine(code, data, status); + if (U_SUCCESS(status) && engine != nullptr) { + return engine; + } + if (engine != nullptr) { + delete engine; + engine = nullptr; + } else { + DeleteLSTMData(data); + } + } + } + status = U_ZERO_ERROR; // fallback to dictionary based DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { - const LanguageBreakEngine *engine = NULL; switch(code) { case USCRIPT_THAI: engine = new ThaiBreakEngine(m, status); |