ICU 57.1 57.1
caniter.h
Go to the documentation of this file.
1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8#ifndef CANITER_H
9#define CANITER_H
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_NORMALIZATION
14
15#include "unicode/uobject.h"
16#include "unicode/unistr.h"
17
27#ifndef CANITER_SKIP_ZEROES
28#define CANITER_SKIP_ZEROES TRUE
29#endif
30
32
33class Hashtable;
34class Normalizer2;
35class Normalizer2Impl;
36
72class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
73public:
81
87
94
99 void reset();
100
109
117 void setSource(const UnicodeString &newSource, UErrorCode &status);
118
119#ifndef U_HIDE_INTERNAL_API
129 static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
130#endif /* U_HIDE_INTERNAL_API */
131
137 static UClassID U_EXPORT2 getStaticClassID();
138
145
146private:
147 // ===================== PRIVATES ==============================
148 // private default constructor
150
151
157
162 CanonicalIterator& operator=(const CanonicalIterator& other);
163
164 // fields
165 UnicodeString source;
166 UBool done;
167
168 // 2 dimensional array holds the pieces of the string with
169 // their different canonically equivalent representations
170 UnicodeString **pieces;
171 int32_t pieces_length;
172 int32_t *pieces_lengths;
173
174 // current is used in iterating to combine pieces
175 int32_t *current;
176 int32_t current_length;
177
178 // transient fields
179 UnicodeString buffer;
180
181 const Normalizer2 &nfd;
182 const Normalizer2Impl &nfcImpl;
183
184 // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
185 UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
186
187 //Set getEquivalents2(String segment);
188 Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
189 //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
190
196 //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
197 Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
198 //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
199
200 void cleanPieces();
201
202};
203
205
206#endif /* #if !UCONFIG_NO_NORMALIZATION */
207
208#endif
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition: caniter.h:72
UnicodeString next()
Get the next canonically equivalent string.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
void reset()
Resets the iterator so that one can start again from the beginning.
CanonicalIterator(const UnicodeString &source, UErrorCode &status)
Construct a CanonicalIterator object.
void setSource(const UnicodeString &newSource, UErrorCode &status)
Set a new source for this iterator.
UnicodeString getSource()
Gets the NFD form of the current source we are iterating over.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status)
Dumb recursive implementation of permutation.
virtual ~CanonicalIterator()
Destructor Cleans pieces.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129