summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common/brkeng.cpp
diff options
context:
space:
mode:
authorbruvzg <7645683+bruvzg@users.noreply.github.com>2021-10-28 09:15:28 +0300
committerbruvzg <7645683+bruvzg@users.noreply.github.com>2021-10-28 09:15:28 +0300
commit44a241b241af6453d4459c79b1a562c447e36636 (patch)
tree0b90326241b6e647b9f8991cddeee5e8d4d93e9f /thirdparty/icu4c/common/brkeng.cpp
parent157cba39331c5ca945c8c3bb2173c5363550a680 (diff)
ICU: Update to version 70.1
Diffstat (limited to 'thirdparty/icu4c/common/brkeng.cpp')
-rw-r--r--thirdparty/icu4c/common/brkeng.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/thirdparty/icu4c/common/brkeng.cpp b/thirdparty/icu4c/common/brkeng.cpp
index 78492db662..52e9c53621 100644
--- a/thirdparty/icu4c/common/brkeng.cpp
+++ b/thirdparty/icu4c/common/brkeng.cpp
@@ -25,6 +25,7 @@
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
+#include "lstmbe.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "mutex.h"
@@ -77,7 +78,9 @@ int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
- UVector32 &/*foundBreaks*/ ) const {
+ UVector32 &/*foundBreaks*/,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
@@ -132,14 +135,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
- if (fEngines == NULL) {
- UStack *engines = new UStack(_deleteEngine, NULL, status);
- if (U_FAILURE(status) || engines == NULL) {
+ if (fEngines == nullptr) {
+ LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
+ if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
- delete engines;
- return NULL;
+ return nullptr;
}
- fEngines = engines;
+ fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
@@ -152,10 +154,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
// We didn't find an engine. Create one.
lbe = loadEngineFor(c);
- if (lbe != NULL) {
+ if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
- return lbe;
+ return U_SUCCESS(status) ? lbe : nullptr;
}
const LanguageBreakEngine *
@@ -163,9 +165,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
+ const LanguageBreakEngine *engine = nullptr;
+ // Try to use LSTM first
+ const LSTMData *data = CreateLSTMDataForScript(code, status);
+ if (U_SUCCESS(status)) {
+ if (data != nullptr) {
+ engine = CreateLSTMBreakEngine(code, data, status);
+ if (U_SUCCESS(status) && engine != nullptr) {
+ return engine;
+ }
+ if (engine != nullptr) {
+ delete engine;
+ engine = nullptr;
+ } else {
+ DeleteLSTMData(data);
+ }
+ }
+ }
+ status = U_ZERO_ERROR; // fallback to dictionary based
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
- const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(m, status);