summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common/dictbe.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/dictbe.h')
-rw-r--r--thirdparty/icu4c/common/dictbe.h27
1 files changed, 20 insertions, 7 deletions
diff --git a/thirdparty/icu4c/common/dictbe.h b/thirdparty/icu4c/common/dictbe.h
index 4e70ed3817..ca1a3c28b7 100644
--- a/thirdparty/icu4c/common/dictbe.h
+++ b/thirdparty/icu4c/common/dictbe.h
@@ -15,6 +15,7 @@
#include "unicode/utext.h"
#include "brkeng.h"
+#include "hash.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
@@ -80,6 +81,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status ) const override;
protected:
@@ -105,6 +107,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const = 0;
};
@@ -127,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
* @internal
*/
- UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
@@ -164,6 +166,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@@ -186,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
* @internal
*/
- UnicodeSet fLaoWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@@ -222,6 +224,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@@ -244,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
* @internal
*/
- UnicodeSet fBurmeseWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@@ -280,6 +282,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@@ -302,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
* @internal
*/
- UnicodeSet fKhmerWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@@ -338,6 +340,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@@ -366,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fHangulWordSet;
- UnicodeSet fHanWordSet;
- UnicodeSet fKatakanaWordSet;
- UnicodeSet fHiraganaWordSet;
+ UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
+ UnicodeSet fClosePunctuationSet;
DictionaryMatcher *fDictionary;
const Normalizer2 *nfkcNorm2;
+ private:
+ // Load Japanese extensions.
+ void loadJapaneseExtensions(UErrorCode& error);
+ // Load Japanese Hiragana.
+ void loadHiragana(UErrorCode& error);
+ // Initialize fSkipSet by loading Japanese Hiragana and extensions.
+ void initJapanesePhraseParameter(UErrorCode& error);
+
+ Hashtable fSkipSet;
+
public:
/**
@@ -404,6 +416,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
+ UBool isPhraseBreaking,
UErrorCode& status) const override;
};