Go to the documentation of this file.
24 #if !UCONFIG_NO_BREAK_ITERATION
38 struct RBBIDataHeader;
39 class RuleBasedBreakIteratorTables;
41 class RBBIDataWrapper;
43 class LanguageBreakEngine;
44 class UnhandledEngine;
45 struct RBBIStateTable;
173 #ifndef U_HIDE_INTERNAL_API
210 friend class RBBIRuleBuilder;
336 virtual int32_t hashCode(
void)
const;
430 virtual int32_t
first(
void);
437 virtual int32_t
last(
void);
449 virtual int32_t
next(int32_t n);
456 virtual int32_t
next(
void);
472 virtual int32_t
following(int32_t offset);
481 virtual int32_t
preceding(int32_t offset);
498 virtual int32_t
current(
void)
const;
584 static UClassID U_EXPORT2 getStaticClassID(
void);
634 virtual const uint8_t *getBinaryRules(uint32_t &length);
673 virtual void reset(
void);
690 virtual int32_t getBreakType()
const;
697 virtual void setBreakType(int32_t type);
699 #ifndef U_HIDE_INTERNAL_API
718 int32_t handlePrevious(
const RBBIStateTable *statetable);
729 int32_t handleNext(
const RBBIStateTable *statetable);
733 #ifndef U_HIDE_INTERNAL_API
748 int32_t checkDictionary(int32_t startPos, int32_t endPos,
UBool reverse);
759 const LanguageBreakEngine *getLanguageBreakEngine(
UChar32 c);
764 void makeRuleStatusValid();
UStack * fLanguageBreakEngines
If present, UStack of LanguageBreakEngine objects that might handle dictionary characters.
StringCharacterIterator * fSCharIter
When the input text is provided by a UnicodeString, this will point to a characterIterator that wraps...
The BreakIterator class implements methods for finding the location of boundaries in text.
C API: Parse Error Information.
int32_t * fCachedBreakPositions
When a range of characters is divided up using the dictionary, the break positions that are discovere...
Basic definitions for ICU, for both C and C++ APIs.
virtual int32_t current(void) const =0
Return character index of the current interator position within the text.
int8_t UBool
The ICU boolean type.
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the most recen...
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
int32_t fPositionInCache
if fCachedBreakPositions is not null, this indicates which item in the cache the current iteration po...
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
EDontAdopt
Constant to be used in the constructor RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt,...
virtual UBool isBoundary(int32_t offset)=0
Return true if the specfied position is a boundary position.
A UParseError struct is used to returned detailed information about parsing errors.
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
C++ API: String Character Iterator.
int32_t fLastRuleStatusIndex
Index of the Rule {tag} values for the most recent match.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
int32_t fBreakType
The type of the break iterator, or -1 if it has not been set.
int32_t fNumCachedBreakPositions
The number of elements in fCachedBreakPositions.
A subclass of BreakIterator whose behavior is specified using a list of rules.
C API: Data loading interface.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
C++ API: UChar Character Iterator.
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
UnhandledEngine * fUnhandledBreakEngine
If present, the special LanguageBreakEngine used for handling characters that are in the dictionary s...
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
uint32_t fDictionaryCharCount
Counter for the number of characters encountered with the "dictionary" flag set.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the m...
Abstract class that defines an API for iteration on text objects.
UText * fText
The UText through which this BreakIterator accesses the text.
UBool fLastStatusIndexValid
Rule tag value valid flag.
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
#define U_NAMESPACE_BEGIN
UCharCharacterIterator * fDCharIter
When the input text is provided by a UText, this dummy CharacterIterator over an empty string will be...
CharacterIterator * fCharIter
A character iterator that refers to the same text as the UText, above.