ICU 57.1 57.1
translit.h
Go to the documentation of this file.
1/*
2**********************************************************************
3* Copyright (C) 1999-2014, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/17/99 aliu Creation.
8**********************************************************************
9*/
10#ifndef TRANSLIT_H
11#define TRANSLIT_H
12
13#include "unicode/utypes.h"
14
20#if !UCONFIG_NO_TRANSLITERATION
21
22#include "unicode/uobject.h"
23#include "unicode/unistr.h"
24#include "unicode/parseerr.h"
25#include "unicode/utrans.h" // UTransPosition, UTransDirection
26#include "unicode/strenum.h"
27
29
30class UnicodeFilter;
31class UnicodeSet;
32class CompoundTransliterator;
33class TransliteratorParser;
34class NormalizationTransliterator;
35class TransliteratorIDParser;
36
242
243private:
244
248 UnicodeString ID;
249
256 UnicodeFilter* filter;
257
258 int32_t maximumContextLength;
259
260 public:
261
267 union Token {
272 int32_t integer;
277 void* pointer;
278 };
279
280#ifndef U_HIDE_INTERNAL_API
286 inline static Token integerToken(int32_t);
287
293 inline static Token pointerToken(void*);
294#endif /* U_HIDE_INTERNAL_API */
295
311 typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
312
313protected:
314
324 Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
325
331
337
350 const UnicodeString* canon);
351
352 friend class TransliteratorParser; // for parseID()
353 friend class TransliteratorIDParser; // for createBasicInstance()
354 friend class TransliteratorAlias; // for setID()
355
356public:
357
363
378 virtual Transliterator* clone() const;
379
395 virtual int32_t transliterate(Replaceable& text,
396 int32_t start, int32_t limit) const;
397
403 virtual void transliterate(Replaceable& text) const;
404
469 virtual void transliterate(Replaceable& text, UTransPosition& index,
470 const UnicodeString& insertion,
471 UErrorCode& status) const;
472
488 virtual void transliterate(Replaceable& text, UTransPosition& index,
489 UChar32 insertion,
490 UErrorCode& status) const;
491
505 virtual void transliterate(Replaceable& text, UTransPosition& index,
506 UErrorCode& status) const;
507
520 UTransPosition& index) const;
521
522private:
523
539 void _transliterate(Replaceable& text,
540 UTransPosition& index,
541 const UnicodeString* insertion,
542 UErrorCode &status) const;
543
544protected:
545
626 UTransPosition& pos,
627 UBool incremental) const = 0;
628
629public:
642 UTransPosition& index,
643 UBool incremental) const;
644
645private:
646
674 virtual void filteredTransliterate(Replaceable& text,
675 UTransPosition& index,
676 UBool incremental,
677 UBool rollback) const;
678
679public:
680
694 int32_t getMaximumContextLength(void) const;
695
696protected:
697
704 void setMaximumContextLength(int32_t maxContextLength);
705
706public:
707
718 virtual const UnicodeString& getID(void) const;
719
729 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
730 UnicodeString& result);
731
753 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
754 const Locale& inLocale,
755 UnicodeString& result);
756
764 const UnicodeFilter* getFilter(void) const;
765
776
787 void adoptFilter(UnicodeFilter* adoptedFilter);
788
809
826 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
827 UTransDirection dir,
828 UParseError& parseError,
829 UErrorCode& status);
830
841 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
842 UTransDirection dir,
843 UErrorCode& status);
844
860 static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
861 const UnicodeString& rules,
862 UTransDirection dir,
863 UParseError& parseError,
864 UErrorCode& status);
865
878 UBool escapeUnprintable) const;
879
892 int32_t countElements() const;
893
913 const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
914
931
946 virtual void handleGetSourceSet(UnicodeSet& result) const;
947
961 virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
962
963public:
964
981 static void U_EXPORT2 registerFactory(const UnicodeString& id,
982 Factory factory,
983 Token context);
984
1006 static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1007
1022 static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1023 const UnicodeString& realID);
1024
1025protected:
1026
1027#ifndef U_HIDE_INTERNAL_API
1037 static void _registerFactory(const UnicodeString& id,
1038 Factory factory,
1039 Token context);
1040
1044 static void _registerInstance(Transliterator* adoptedObj);
1045
1049 static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1050
1084 static void _registerSpecialInverse(const UnicodeString& target,
1085 const UnicodeString& inverseTarget,
1086 UBool bidirectional);
1087#endif /* U_HIDE_INTERNAL_API */
1088
1089public:
1090
1108 static void U_EXPORT2 unregister(const UnicodeString& ID);
1109
1110public:
1111
1122
1128 static int32_t U_EXPORT2 countAvailableSources(void);
1129
1139 static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1140 UnicodeString& result);
1141
1150 static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1151
1163 static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1164 const UnicodeString& source,
1165 UnicodeString& result);
1166
1174 static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1175 const UnicodeString& target);
1176
1190 static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1191 const UnicodeString& source,
1192 const UnicodeString& target,
1193 UnicodeString& result);
1194
1195protected:
1196
1197#ifndef U_HIDE_INTERNAL_API
1202 static int32_t _countAvailableSources(void);
1203
1208 static UnicodeString& _getAvailableSource(int32_t index,
1209 UnicodeString& result);
1210
1215 static int32_t _countAvailableTargets(const UnicodeString& source);
1216
1221 static UnicodeString& _getAvailableTarget(int32_t index,
1222 const UnicodeString& source,
1223 UnicodeString& result);
1224
1229 static int32_t _countAvailableVariants(const UnicodeString& source,
1230 const UnicodeString& target);
1231
1237 const UnicodeString& source,
1238 const UnicodeString& target,
1239 UnicodeString& result);
1240#endif /* U_HIDE_INTERNAL_API */
1241
1242protected:
1243
1250 void setID(const UnicodeString& id);
1251
1252public:
1253
1264 static UClassID U_EXPORT2 getStaticClassID(void);
1265
1281 virtual UClassID getDynamicClassID(void) const = 0;
1282
1283private:
1284 static UBool initializeRegistry(UErrorCode &status);
1285
1286public:
1287#ifndef U_HIDE_OBSOLETE_API
1295 static int32_t U_EXPORT2 countAvailableIDs(void);
1296
1309 static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1310#endif /* U_HIDE_OBSOLETE_API */
1311};
1312
1314 return maximumContextLength;
1315}
1316
1317inline void Transliterator::setID(const UnicodeString& id) {
1318 ID = id;
1319 // NUL-terminate the ID string, which is a non-aliased copy.
1320 ID.append((UChar)0);
1321 ID.truncate(ID.length()-1);
1322}
1323
1324#ifndef U_HIDE_INTERNAL_API
1326 Token t;
1327 t.integer = i;
1328 return t;
1329}
1330
1332 Token t;
1333 t.pointer = p;
1334 return t;
1335}
1336#endif /* U_HIDE_INTERNAL_API */
1337
1339
1340#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1341
1342#endif
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:241
static void registerFactory(const UnicodeString &id, Factory factory, Token context)
Registers a factory function that creates transliterators of a given ID.
static void registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
Registers an ID string as an alias of another ID string.
virtual UnicodeSet & getTargetSet(UnicodeSet &result) const
Returns the set of all characters that may be generated as replacement text by this transliterator.
virtual int32_t transliterate(Replaceable &text, int32_t start, int32_t limit) const
Transliterates a segment of a string, with optional filtering.
virtual void transliterate(Replaceable &text, UTransPosition &index, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly.
static void _registerFactory(const UnicodeString &id, Factory factory, Token context)
static int32_t _countAvailableSources(void)
Non-mutexed internal method.
const Transliterator & getElement(int32_t index, UErrorCode &ec) const
Return an element that makes up this transliterator.
virtual void transliterate(Replaceable &text) const
Transliterates an entire string in place.
static int32_t _countAvailableTargets(const UnicodeString &source)
Non-mutexed internal method.
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object given its ID.
virtual UnicodeString & toRules(UnicodeString &result, UBool escapeUnprintable) const
Create a rule string that can be passed to createFromRules() to recreate this transliterator.
void setMaximumContextLength(int32_t maxContextLength)
Method for subclasses to use to set the maximum context length.
int32_t countElements() const
Return the number of elements that make up this transliterator.
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:311
static Token integerToken(int32_t)
Return a token containing an integer.
Definition: translit.h:1325
UnicodeFilter * orphanFilter(void)
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
static const UnicodeString & getAvailableID(int32_t index)
Return the index-th available ID.
virtual void transliterate(Replaceable &text, UTransPosition &index, const UnicodeString &insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after new text ...
const UnicodeFilter * getFilter(void) const
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
UnicodeSet & getSourceSet(UnicodeSet &result) const
Returns the set of all characters that may be modified in the input text by this Transliterator.
static UnicodeString & getAvailableSource(int32_t index, UnicodeString &result)
Return a registered source specifier.
static int32_t countAvailableIDs(void)
Return the number of IDs currently registered with the system.
static void _registerInstance(Transliterator *adoptedObj)
virtual Transliterator * clone() const
Implements Cloneable.
static Token pointerToken(void *)
Return a token containing a pointer.
Definition: translit.h:1331
Transliterator * createInverse(UErrorCode &status) const
Returns this transliterator's inverse.
int32_t getMaximumContextLength(void) const
Returns the length of the longest context required by this transliterator.
Definition: translit.h:1313
static UnicodeString & getDisplayName(const UnicodeString &ID, const Locale &inLocale, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the given local...
static UnicodeString & getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Return a registered variant specifier for a given source-target pair.
static UnicodeString & getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Return a registered target specifier for a given source.
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UErrorCode &status)
Returns a Transliterator object given its ID.
static int32_t _countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Non-mutexed internal method.
static void unregister(const UnicodeString &ID)
Unregisters a transliterator or class.
static int32_t countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Return the number of registered variant specifiers for a given source-target pair.
static void registerInstance(Transliterator *adoptedObj)
Registers an instance obj of a subclass of Transliterator with the system.
static StringEnumeration * getAvailableIDs(UErrorCode &ec)
Return a StringEnumeration over the IDs available at the time of the call, including user-registered ...
static UClassID getStaticClassID(void)
Return the class ID for this class.
virtual void transliterate(Replaceable &text, UTransPosition &index, UChar32 insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after a new cha...
static UnicodeString & _getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Non-mutexed internal method.
virtual void handleGetSourceSet(UnicodeSet &result) const
Framework method that returns the set of all characters that may be modified in the input text by thi...
virtual void filteredTransliterate(Replaceable &text, UTransPosition &index, UBool incremental) const
Transliterate a substring of text, as specified by index, taking filters into account.
virtual ~Transliterator()
Destructor.
virtual void handleTransliterate(Replaceable &text, UTransPosition &pos, UBool incremental) const =0
Abstract method that concrete subclasses define to implement their transliteration algorithm.
void setID(const UnicodeString &id)
Set the ID of this transliterators.
Definition: translit.h:1317
static UnicodeString & _getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Non-mutexed internal method.
static UnicodeString & _getAvailableSource(int32_t index, UnicodeString &result)
Non-mutexed internal method.
static int32_t countAvailableSources(void)
Return the number of registered source specifiers.
static void _registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
virtual void finishTransliteration(Replaceable &text, UTransPosition &index) const
Finishes any pending transliterations that were waiting for more characters.
Transliterator(const Transliterator &)
Copy constructor.
static UnicodeString & getDisplayName(const UnicodeString &ID, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the default loc...
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID polymorphically.
Transliterator & operator=(const Transliterator &)
Assignment operator.
static Transliterator * createFromRules(const UnicodeString &ID, const UnicodeString &rules, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object constructed from the given rule string.
virtual const UnicodeString & getID(void) const
Returns a programmatic identifier for this transliterator.
void adoptFilter(UnicodeFilter *adoptedFilter)
Changes the filter used by this transliterator.
static void _registerSpecialInverse(const UnicodeString &target, const UnicodeString &inverseTarget, UBool bidirectional)
Register two targets as being inverses of one another.
static int32_t countAvailableTargets(const UnicodeString &source)
Return the number of registered target specifiers for a given source specifier.
static Transliterator * createBasicInstance(const UnicodeString &id, const UnicodeString *canon)
Create a transliterator from a basic ID.
Transliterator(const UnicodeString &ID, UnicodeFilter *adoptedFilter)
Default constructor.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:59
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3794
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4634
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4526
C API: Parse Error Information.
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:120
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:267
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:277
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:272
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
C API: Transliterator.
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:78
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:358
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129