diff options
Diffstat (limited to 'thirdparty/icu4c/common/dictbe.h')
-rw-r--r-- | thirdparty/icu4c/common/dictbe.h | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/thirdparty/icu4c/common/dictbe.h b/thirdparty/icu4c/common/dictbe.h index 4e70ed3817..ca1a3c28b7 100644 --- a/thirdparty/icu4c/common/dictbe.h +++ b/thirdparty/icu4c/common/dictbe.h @@ -15,6 +15,7 @@ #include "unicode/utext.h" #include "brkeng.h" +#include "hash.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -80,6 +81,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine { int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status ) const override; protected: @@ -105,6 +107,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const = 0; }; @@ -127,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fThaiWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fSuffixSet; @@ -164,6 +166,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -186,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fLaoWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -222,6 +224,7 @@ class LaoBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -244,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fBurmeseWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -280,6 +282,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -302,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fKhmerWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -338,6 +340,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -366,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine { * @internal */ UnicodeSet fHangulWordSet; - UnicodeSet fHanWordSet; - UnicodeSet fKatakanaWordSet; - UnicodeSet fHiraganaWordSet; + UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet; + UnicodeSet fClosePunctuationSet; DictionaryMatcher *fDictionary; const Normalizer2 *nfkcNorm2; + private: + // Load Japanese extensions. + void loadJapaneseExtensions(UErrorCode& error); + // Load Japanese Hiragana. + void loadHiragana(UErrorCode& error); + // Initialize fSkipSet by loading Japanese Hiragana and extensions. + void initJapanesePhraseParameter(UErrorCode& error); + + Hashtable fSkipSet; + public: /** @@ -404,6 +416,7 @@ class CjkBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; |