00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef RBBI_H
00017 #define RBBI_H
00018
00019 #include "unicode/utypes.h"
00020
00021 #if U_SHOW_CPLUSPLUS_API
00022
00028 #if !UCONFIG_NO_BREAK_ITERATION
00029
00030 #include "unicode/brkiter.h"
00031 #include "unicode/udata.h"
00032 #include "unicode/parseerr.h"
00033 #include "unicode/schriter.h"
00034
00035 struct UCPTrie;
00036
00037 U_NAMESPACE_BEGIN
00038
00040 class LanguageBreakEngine;
00041 struct RBBIDataHeader;
00042 class RBBIDataWrapper;
00043 class UnhandledEngine;
00044 class UStack;
00045
00057 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00058
00059 private:
00064 UText fText;
00065
00066 #ifndef U_HIDE_INTERNAL_API
00067 public:
00068 #endif
00069
00074 RBBIDataWrapper *fData;
00075 private:
00076
00081 int32_t fPosition;
00082
00086 int32_t fRuleStatusIndex;
00087
00091 class BreakCache;
00092 BreakCache *fBreakCache;
00093
00098 class DictionaryCache;
00099 DictionaryCache *fDictionaryCache;
00100
00108 UStack *fLanguageBreakEngines;
00109
00117 UnhandledEngine *fUnhandledBreakEngine;
00118
00124 uint32_t fDictionaryCharCount;
00125
00131 CharacterIterator *fCharIter;
00132
00138 StringCharacterIterator fSCharIter;
00139
00143 UBool fDone;
00144
00148 int32_t *fLookAheadMatches;
00149
00153 UBool fIsPhraseBreaking;
00154
00155
00156
00157
00158
00169 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00170
00184 RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
00185
00187 friend class RBBIRuleBuilder;
00189 friend class BreakIterator;
00190
00191 public:
00192
00197 RuleBasedBreakIterator();
00198
00205 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00206
00215 RuleBasedBreakIterator( const UnicodeString &rules,
00216 UParseError &parseError,
00217 UErrorCode &status);
00218
00242 RuleBasedBreakIterator(const uint8_t *compiledRules,
00243 uint32_t ruleLength,
00244 UErrorCode &status);
00245
00258 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00259
00264 virtual ~RuleBasedBreakIterator();
00265
00273 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00274
00283 virtual bool operator==(const BreakIterator& that) const override;
00284
00292 inline bool operator!=(const BreakIterator& that) const;
00293
00304 virtual RuleBasedBreakIterator* clone() const override;
00305
00311 virtual int32_t hashCode(void) const;
00312
00318 virtual const UnicodeString& getRules(void) const;
00319
00320
00321
00322
00323
00349 virtual CharacterIterator& getText(void) const override;
00350
00351
00366 virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
00367
00375 virtual void adoptText(CharacterIterator* newText) override;
00376
00388 virtual void setText(const UnicodeString& newText) override;
00389
00403 virtual void setText(UText *text, UErrorCode &status) override;
00404
00410 virtual int32_t first(void) override;
00411
00417 virtual int32_t last(void) override;
00418
00429 virtual int32_t next(int32_t n) override;
00430
00436 virtual int32_t next(void) override;
00437
00443 virtual int32_t previous(void) override;
00444
00452 virtual int32_t following(int32_t offset) override;
00453
00461 virtual int32_t preceding(int32_t offset) override;
00462
00471 virtual UBool isBoundary(int32_t offset) override;
00472
00481 virtual int32_t current(void) const override;
00482
00483
00515 virtual int32_t getRuleStatus() const override;
00516
00540 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
00541
00553 virtual UClassID getDynamicClassID(void) const override;
00554
00566 static UClassID U_EXPORT2 getStaticClassID(void);
00567
00568 #ifndef U_FORCE_HIDE_DEPRECATED_API
00569
00595 virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
00596 int32_t &BufferSize,
00597 UErrorCode &status) override;
00598 #endif // U_FORCE_HIDE_DEPRECATED_API
00599
00617 virtual const uint8_t *getBinaryRules(uint32_t &length);
00618
00644 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
00645
00646
00647 private:
00648
00649
00650
00656 void reset(void);
00657
00662 void init(UErrorCode &status);
00663
00673 int32_t handleSafePrevious(int32_t fromPosition);
00674
00687 int32_t handleNext();
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703 typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
00704
00705 template<typename RowType, PTrieFunc trieFunc>
00706 int32_t handleSafePrevious(int32_t fromPosition);
00707
00708 template<typename RowType, PTrieFunc trieFunc>
00709 int32_t handleNext();
00710
00711
00718 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00719
00720 public:
00721 #ifndef U_HIDE_INTERNAL_API
00722
00726 void dumpCache();
00727
00732 void dumpTables();
00733 #endif
00734 };
00735
00736
00737
00738
00739
00740
00741
00742 inline bool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00743 return !operator==(that);
00744 }
00745
00746 U_NAMESPACE_END
00747
00748 #endif
00749
00750 #endif
00751
00752 #endif