ICU 57.1 57.1
rbbi.h
Go to the documentation of this file.
1/*
2***************************************************************************
3* Copyright (C) 1999-2014 International Business Machines Corporation *
4* and others. All rights reserved. *
5***************************************************************************
6
7**********************************************************************
8* Date Name Description
9* 10/22/99 alan Creation.
10* 11/11/99 rgillam Complete port from Java.
11**********************************************************************
12*/
13
14#ifndef RBBI_H
15#define RBBI_H
16
17#include "unicode/utypes.h"
18
23
24#if !UCONFIG_NO_BREAK_ITERATION
25
26#include "unicode/brkiter.h"
27#include "unicode/udata.h"
28#include "unicode/parseerr.h"
29#include "unicode/schriter.h"
30#include "unicode/uchriter.h"
31
32
33struct UTrie;
34
36
38struct RBBIDataHeader;
39class RuleBasedBreakIteratorTables;
40class BreakIterator;
41class RBBIDataWrapper;
42class UStack;
43class LanguageBreakEngine;
44class UnhandledEngine;
45struct RBBIStateTable;
46
47
48
49
66
67protected:
73
80
87
94
99 RBBIDataWrapper *fData;
100
105
113
120
129
135
142
151
159 UnhandledEngine *fUnhandledBreakEngine;
160
166 int32_t fBreakType;
167
168protected:
169 //=======================================================================
170 // constructors
171 //=======================================================================
172
173#ifndef U_HIDE_INTERNAL_API
183 kDontAdopt
184 };
185
196 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
197
206 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
207#endif /* U_HIDE_INTERNAL_API */
208
209
210 friend class RBBIRuleBuilder;
212 friend class BreakIterator;
213
214
215
216public:
217
223
231
241 UParseError &parseError,
242 UErrorCode &status);
243
267 RuleBasedBreakIterator(const uint8_t *compiledRules,
268 uint32_t ruleLength,
269 UErrorCode &status);
270
284
290
299
308 virtual UBool operator==(const BreakIterator& that) const;
309
317 UBool operator!=(const BreakIterator& that) const;
318
329 virtual BreakIterator* clone() const;
330
336 virtual int32_t hashCode(void) const;
337
343 virtual const UnicodeString& getRules(void) const;
344
345 //=======================================================================
346 // BreakIterator overrides
347 //=======================================================================
348
374 virtual CharacterIterator& getText(void) const;
375
376
391 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
392
400 virtual void adoptText(CharacterIterator* newText);
401
408 virtual void setText(const UnicodeString& newText);
409
423 virtual void setText(UText *text, UErrorCode &status);
424
430 virtual int32_t first(void);
431
437 virtual int32_t last(void);
438
449 virtual int32_t next(int32_t n);
450
456 virtual int32_t next(void);
457
463 virtual int32_t previous(void);
464
472 virtual int32_t following(int32_t offset);
473
481 virtual int32_t preceding(int32_t offset);
482
491 virtual UBool isBoundary(int32_t offset);
492
498 virtual int32_t current(void) const;
499
500
533 virtual int32_t getRuleStatus() const;
534
558 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
559
571 virtual UClassID getDynamicClassID(void) const;
572
584 static UClassID U_EXPORT2 getStaticClassID(void);
585
612 virtual BreakIterator * createBufferClone(void *stackBuffer,
613 int32_t &BufferSize,
614 UErrorCode &status);
615
616
634 virtual const uint8_t *getBinaryRules(uint32_t &length);
635
662
663
664protected:
665 //=======================================================================
666 // implementation
667 //=======================================================================
673 virtual void reset(void);
674
675#if 0
684 virtual UBool isDictionaryChar(UChar32);
685
690 virtual int32_t getBreakType() const;
691#endif
692
697 virtual void setBreakType(int32_t type);
698
699#ifndef U_HIDE_INTERNAL_API
704 void init();
705#endif /* U_HIDE_INTERNAL_API */
706
707private:
708
718 int32_t handlePrevious(const RBBIStateTable *statetable);
719
729 int32_t handleNext(const RBBIStateTable *statetable);
730
731protected:
732
733#ifndef U_HIDE_INTERNAL_API
748 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
749#endif /* U_HIDE_INTERNAL_API */
750
751private:
752
759 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
760
764 void makeRuleStatusValid();
765
766};
767
768//------------------------------------------------------------------------------
769//
770// Inline Functions Definitions ...
771//
772//------------------------------------------------------------------------------
773
775 return !operator==(that);
776}
777
779
780#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
781
782#endif
C++ API: Break Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:100
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:129
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:356
virtual void adoptText(CharacterIterator *newText)
Set the iterator to analyze a new piece of text.
int32_t fLastRuleStatusIndex
Index of the Rule {tag} values for the most recent match.
Definition rbbi.h:104
UBool operator!=(const BreakIterator &that) const
Not-equal operator.
Definition rbbi.h:774
virtual int32_t following(int32_t offset)
Sets the iterator to refer to the first boundary position following the specified position.
virtual int32_t next(int32_t n)
Advances the iterator either forward or backward the specified number of steps.
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
virtual int32_t next(void)
Advances the iterator to the next boundary position.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
RuleBasedBreakIterator(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status)
Constructor from a flattened set of RBBI data in memory which need not be malloced (e....
virtual UBool operator==(const BreakIterator &that) const
Equality operator.
virtual int32_t current(void) const
Returns the current iteration position.
virtual void setText(const UnicodeString &newText)
Set the iterator to analyze a new piece of text.
UText * fText
The UText through which this BreakIterator accesses the text.
Definition rbbi.h:72
virtual int32_t first(void)
Sets the current iteration position to the beginning of the text, position zero.
virtual const UnicodeString & getRules(void) const
Returns the description used to create this iterator.
virtual void reset(void)
Dumps caches and performs other actions associated with a complete change in text or iteration positi...
int32_t * fCachedBreakPositions
When a range of characters is divided up using the dictionary, the break positions that are discovere...
Definition rbbi.h:128
friend class BreakIterator
Definition rbbi.h:212
int32_t fPositionInCache
if fCachedBreakPositions is not null, this indicates which item in the cache the current iteration po...
Definition rbbi.h:141
UnhandledEngine * fUnhandledBreakEngine
If present, the special LanguageBreakEngine used for handling characters that are in the dictionary s...
Definition rbbi.h:159
UBool fLastStatusIndexValid
Rule tag value valid flag.
Definition rbbi.h:112
uint32_t fDictionaryCharCount
Counter for the number of characters encountered with the "dictionary" flag set.
Definition rbbi.h:119
virtual UText * getUText(UText *fillIn, UErrorCode &status) const
Get a UText for the text being analyzed.
virtual BreakIterator * clone() const
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
virtual CharacterIterator & getText(void) const
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse)
This is the function that actually implements dictionary-based breaking.
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
Get the status (tag) values from the break rule(s) that determined the most recently returned break p...
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
virtual int32_t previous(void)
Moves the iterator backwards, to the last boundary preceding this one.
EDontAdopt
Constant to be used in the constructor RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt,...
Definition rbbi.h:182
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
Definition rbbi.h:99
RuleBasedBreakIterator(RBBIDataHeader *data, UErrorCode &status)
Constructor from a flattened set of RBBI data in malloced memory.
StringCharacterIterator * fSCharIter
When the input text is provided by a UnicodeString, this will point to a characterIterator that wraps...
Definition rbbi.h:86
virtual int32_t last(void)
Sets the current iteration position to the end of the text.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
RuleBasedBreakIterator()
Default constructor.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)
Deprecated functionality.
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual UBool isBoundary(int32_t offset)
Returns true if the specfied position is a boundary position.
UCharCharacterIterator * fDCharIter
When the input text is provided by a UText, this dummy CharacterIterator over an empty string will be...
Definition rbbi.h:93
virtual int32_t hashCode(void) const
Compute a hash code for this BreakIterator.
void init()
Common initialization function, used by constructors and bufferClone.
static UClassID getStaticClassID(void)
Returns the class ID for this class.
virtual void setBreakType(int32_t type)
Set the type of the break iterator.
CharacterIterator * fCharIter
A character iterator that refers to the same text as the UText, above.
Definition rbbi.h:79
int32_t fNumCachedBreakPositions
The number of elements in fCachedBreakPositions.
Definition rbbi.h:134
virtual int32_t getRuleStatus() const
Return the status tag from the break rule that determined the most recently returned break position.
virtual void setText(UText *text, UErrorCode &status)
Reset the break iterator to operate over the text represented by the UText.
int32_t fBreakType
The type of the break iterator, or -1 if it has not been set.
Definition rbbi.h:166
virtual int32_t preceding(int32_t offset)
Sets the iterator to refer to the last boundary position before the specified position.
UStack * fLanguageBreakEngines
If present, UStack of LanguageBreakEngine objects that might handle dictionary characters.
Definition rbbi.h:150
virtual ~RuleBasedBreakIterator()
Destructor.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition schriter.h:43
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition uchriter.h:33
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:294
C API: Parse Error Information.
C++ API: String Character Iterator.
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:56
UText struct.
Definition utext.h:1343
C++ API: UChar Character Iterator.
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition udata.h:156
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:332
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:91
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129