ICU 57.1 57.1
ucnv.h
Go to the documentation of this file.
1/*
2**********************************************************************
3* Copyright (C) 1999-2014, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6 * ucnv.h:
7 * External APIs for the ICU's codeset conversion library
8 * Bertrand A. Damiba
9 *
10 * Modification History:
11 *
12 * Date Name Description
13 * 04/04/99 helena Fixed internal header inclusion.
14 * 05/11/00 helena Added setFallback and usesFallback APIs.
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 * 12/07/2000 srl Update of documentation
17 */
18
47#ifndef UCNV_H
48#define UCNV_H
49
50#include "unicode/ucnv_err.h"
51#include "unicode/uenum.h"
53
54#ifndef __USET_H__
55
65struct USet;
67typedef struct USet USet;
68
69#endif
70
71#if !UCONFIG_NO_CONVERSION
72
74
76#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
78#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
79
81#define UCNV_SI 0x0F
83#define UCNV_SO 0x0E
84
168
178typedef enum {
179 UCNV_UNKNOWN = -1,
180 UCNV_IBM = 0
182
199 const void* context,
201 const char *codeUnits,
202 int32_t length,
205
222 const void* context,
224 const UChar* codeUnits,
225 int32_t length,
229
231
237#define UCNV_OPTION_SEP_CHAR ','
238
244#define UCNV_OPTION_SEP_STRING ","
245
251#define UCNV_VALUE_SEP_CHAR '='
252
258#define UCNV_VALUE_SEP_STRING "="
259
268#define UCNV_LOCALE_OPTION_STRING ",locale="
269
281#define UCNV_VERSION_OPTION_STRING ",version="
282
293#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
294
311ucnv_compareNames(const char *name1, const char *name2);
312
313
366
367
395ucnv_openU(const UChar *name,
396 UErrorCode *err);
397
465 UErrorCode * err);
466
499
541 void *stackBuffer,
542 int32_t *pBufferSize,
544
545#ifndef U_HIDE_DEPRECATED_API
546
553#define U_CNV_SAFECLONE_BUFFERSIZE 1024
554
555#endif /* U_HIDE_DEPRECATED_API */
556
570
571#if U_SHOW_CPLUSPLUS_API
572
574
585
587
588#endif
589
609 char *subChars,
610 int8_t *len,
611 UErrorCode *err);
612
634 const char *subChars,
635 int8_t len,
636 UErrorCode *err);
637
667 const UChar *s,
668 int32_t length,
669 UErrorCode *err);
670
686 char *errBytes,
687 int8_t *len,
688 UErrorCode *err);
689
706 int8_t *len,
707 UErrorCode *err);
708
718
729
740
791U_STABLE int8_t U_EXPORT2
793
813#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
814 (((int32_t)(length)+10)*(int32_t)(maxCharSize))
815
824U_STABLE int8_t U_EXPORT2
826
841U_STABLE int32_t U_EXPORT2
843 const char *displayLocale,
845 int32_t displayNameCapacity,
846 UErrorCode *err);
847
858U_STABLE const char * U_EXPORT2
860
884U_STABLE int32_t U_EXPORT2
885ucnv_getCCSID(const UConverter *converter,
886 UErrorCode *err);
887
900 UErrorCode *err);
901
911ucnv_getType(const UConverter * converter);
912
930 UBool starters[256],
931 UErrorCode* err);
932
933
947
948
999
1014 const void **context);
1015
1030 const void **context);
1031
1050 const void* newContext,
1052 const void** oldContext,
1053 UErrorCode * err);
1054
1073 const void *newContext,
1075 const void **oldContext,
1076 UErrorCode * err);
1077
1138 char **target,
1139 const char *targetLimit,
1140 const UChar ** source,
1141 const UChar * sourceLimit,
1142 int32_t* offsets,
1143 UBool flush,
1144 UErrorCode * err);
1145
1207 UChar **target,
1208 const UChar *targetLimit,
1209 const char **source,
1210 const char *sourceLimit,
1211 int32_t *offsets,
1212 UBool flush,
1213 UErrorCode *err);
1214
1242U_STABLE int32_t U_EXPORT2
1244 char *dest, int32_t destCapacity,
1245 const UChar *src, int32_t srcLength,
1247
1274U_STABLE int32_t U_EXPORT2
1276 UChar *dest, int32_t destCapacity,
1277 const char *src, int32_t srcLength,
1279
1352 const char **source,
1353 const char * sourceLimit,
1354 UErrorCode * err);
1355
1496 char **target, const char *targetLimit,
1497 const char **source, const char *sourceLimit,
1500 UBool reset, UBool flush,
1502
1558U_STABLE int32_t U_EXPORT2
1560 const char *fromConverterName,
1561 char *target,
1562 int32_t targetCapacity,
1563 const char *source,
1564 int32_t sourceLength,
1566
1612U_STABLE int32_t U_EXPORT2
1614 UConverter *cnv,
1615 char *target, int32_t targetCapacity,
1616 const char *source, int32_t sourceLength,
1618
1664U_STABLE int32_t U_EXPORT2
1667 char *target, int32_t targetCapacity,
1668 const char *source, int32_t sourceLength,
1670
1678U_STABLE int32_t U_EXPORT2
1680
1688U_STABLE int32_t U_EXPORT2
1690
1701U_STABLE const char* U_EXPORT2
1703
1718
1729U_STABLE uint16_t U_EXPORT2
1731
1744U_STABLE const char * U_EXPORT2
1745ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
1746
1762
1788 const char *standard,
1790
1796U_STABLE uint16_t U_EXPORT2
1798
1806U_STABLE const char * U_EXPORT2
1808
1828U_STABLE const char * U_EXPORT2
1829ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
1830
1850U_STABLE const char * U_EXPORT2
1852
1867U_STABLE const char * U_EXPORT2
1869
1870#ifndef U_HIDE_SYSTEM_API
1888ucnv_setDefaultName(const char *name);
1889#endif /* U_HIDE_SYSTEM_API */
1890
1910
1921
1939
1951
1981U_STABLE const char* U_EXPORT2
1983 int32_t sourceLength,
1984 int32_t *signatureLength,
1986
1998U_STABLE int32_t U_EXPORT2
2000
2012U_STABLE int32_t U_EXPORT2
2014
2032
2033#endif
2034
2035#endif
2036/*_UCNV*/
"Smart pointer" base class; do not use directly: use LocalPointer etc.
"Smart pointer" class, closes a UConverter via ucnv_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
The structure for the fromUnicode callback function parameter.
Definition ucnv_err.h:192
The structure for the toUnicode callback function parameter.
Definition ucnv_err.h:208
UBool ucnv_usesFallback(const UConverter *cnv)
Determines if the converter uses fallback mappings or not.
void(* UConverterToUCallback)(const void *context, UConverterToUnicodeArgs *args, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *pErrorCode)
Function pointer for error callback in the codepage to unicode direction.
Definition ucnv.h:198
int32_t ucnv_getCCSID(const UConverter *converter, UErrorCode *err)
Gets a codepage number associated with the converter.
UConverter * ucnv_openCCSID(int32_t codepage, UConverterPlatform platform, UErrorCode *err)
Creates a UConverter object from a CCSID number and platform pair.
int8_t ucnv_getMaxCharSize(const UConverter *converter)
Returns the maximum number of bytes that are output per UChar in conversion from Unicode using this c...
void ucnv_setFallback(UConverter *cnv, UBool usesFallback)
Sets the converter to use fallback mappings or not.
void ucnv_getSubstChars(const UConverter *converter, char *subChars, int8_t *len, UErrorCode *err)
Fills in the output parameter, subChars, with the substitution characters as multiple bytes.
UConverter * ucnv_safeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe converter cloning operation.
int8_t ucnv_getMinCharSize(const UConverter *converter)
Returns the minimum byte length (per codepoint) for characters in this codepage.
int32_t ucnv_toUChars(UConverter *cnv, UChar *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert the codepage string into a Unicode string using an existing UConverter.
const char * ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode)
Gives the name of the standard at given index of standard list.
void ucnv_close(UConverter *converter)
Deletes the unicode converter and releases resources associated with just this instance.
UEnumeration * ucnv_openStandardNames(const char *convName, const char *standard, UErrorCode *pErrorCode)
Return a new UEnumeration object for enumerating all the alias names for a given converter that are r...
void ucnv_setDefaultName(const char *name)
This function is not thread safe.
int32_t ucnv_toAlgorithmic(UConverterType algorithmicType, UConverter *cnv, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
int32_t ucnv_fromUCountPending(const UConverter *cnv, UErrorCode *status)
Returns the number of UChars held in the converter's internal state because more input is needed for ...
UConverter * ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err)
void(* UConverterFromUCallback)(const void *context, UConverterFromUnicodeArgs *args, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *pErrorCode)
Function pointer for error callback in the unicode to codepage direction.
Definition ucnv.h:221
void ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen)
Fixes the backslash character mismapping.
void ucnv_getFromUCallBack(const UConverter *converter, UConverterFromUCallback *action, const void **context)
Gets the current callback function used by the converter when illegal or invalid Unicode sequence is ...
int32_t ucnv_convert(const char *toConverterName, const char *fromConverterName, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
int32_t ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert the Unicode string into a codepage string using an existing UConverter.
void ucnv_getInvalidUChars(const UConverter *converter, UChar *errUChars, int8_t *len, UErrorCode *err)
Fills in the output parameter, errChars, with the error characters from the last failing conversion.
int32_t ucnv_getDisplayName(const UConverter *converter, const char *displayLocale, UChar *displayName, int32_t displayNameCapacity, UErrorCode *err)
Returns the display name of the converter passed in based on the Locale passed in.
const char * ucnv_getName(const UConverter *converter, UErrorCode *err)
Gets the internal, canonical name of the converter (zero-terminated).
UConverterUnicodeSet
Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
Definition ucnv.h:939
@ UCNV_SET_COUNT
Number of UConverterUnicodeSet selectors.
Definition ucnv.h:945
@ UCNV_ROUNDTRIP_AND_FALLBACK_SET
Select the set of Unicode code points with roundtrip or fallback mappings.
Definition ucnv.h:943
@ UCNV_ROUNDTRIP_SET
Select the set of roundtrippable Unicode code points.
Definition ucnv.h:941
void ucnv_fromUnicode(UConverter *converter, char **target, const char *targetLimit, const UChar **source, const UChar *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
Converts an array of unicode characters to an array of codepage characters.
int32_t ucnv_countAvailable(void)
Returns the number of available converters, as per the alias file.
void ucnv_setFromUCallBack(UConverter *converter, UConverterFromUCallback newAction, const void *newContext, UConverterFromUCallback *oldAction, const void **oldContext, UErrorCode *err)
Changes the current callback function used by the converter when an illegal or invalid sequence is fo...
UEnumeration * ucnv_openAllNames(UErrorCode *pErrorCode)
Returns a UEnumeration to enumerate all of the canonical converter names, as per the alias file,...
int32_t ucnv_fromAlgorithmic(UConverter *cnv, UConverterType algorithmicType, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode)
Convert from one external charset to another.
int32_t ucnv_toUCountPending(const UConverter *cnv, UErrorCode *status)
Returns the number of chars held in the converter's internal state because more input is needed for c...
const char * ucnv_detectUnicodeSignature(const char *source, int32_t sourceLength, int32_t *signatureLength, UErrorCode *pErrorCode)
Detects Unicode signature byte sequences at the start of the byte stream and returns the charset name...
UConverter * ucnv_openU(const UChar *name, UErrorCode *err)
Creates a Unicode converter with the names specified as unicode string.
const char * ucnv_getAvailableName(int32_t n)
Gets the canonical converter name of the specified converter from a list of all available converters ...
int ucnv_compareNames(const char *name1, const char *name2)
Do a fuzzy compare of two converter/alias names.
uint16_t ucnv_countStandards(void)
Gives the number of standards associated to converter names.
UBool ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status)
Returns whether or not the charset of the converter has a fixed number of bytes per charset character...
void ucnv_resetToUnicode(UConverter *converter)
Resets the to-Unicode part of a converter state to the default state.
UConverter * ucnv_open(const char *converterName, UErrorCode *err)
Creates a UConverter object with the name of a coded character set specified as a C string.
UConverterType ucnv_getType(const UConverter *converter)
Gets the type of the converter e.g.
UConverterPlatform ucnv_getPlatform(const UConverter *converter, UErrorCode *err)
Gets a codepage platform associated with the converter.
void ucnv_getStarters(const UConverter *converter, UBool starters[256], UErrorCode *err)
Gets the "starter" (lead) bytes for converters of type MBCS.
UChar32 ucnv_getNextUChar(UConverter *converter, const char **source, const char *sourceLimit, UErrorCode *err)
Convert a codepage buffer into Unicode one character at a time.
int32_t ucnv_flushCache(void)
Frees up memory occupied by unused, cached converter shared data.
void ucnv_setToUCallBack(UConverter *converter, UConverterToUCallback newAction, const void *newContext, UConverterToUCallback *oldAction, const void **oldContext, UErrorCode *err)
Changes the callback function used by the converter when an illegal or invalid sequence is found.
uint16_t ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
Gives the number of aliases for a given converter or alias name.
const char * ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
Gives the name of the alias at given index of alias list.
UConverterPlatform
Enum for specifying which platform a converter ID refers to.
Definition ucnv.h:178
void ucnv_resetFromUnicode(UConverter *converter)
Resets the from-Unicode part of a converter state to the default state.
const char * ucnv_getDefaultName(void)
Returns the current default converter name.
void ucnv_setSubstChars(UConverter *converter, const char *subChars, int8_t len, UErrorCode *err)
Sets the substitution chars when converting from unicode to a codepage.
UBool ucnv_isAmbiguous(const UConverter *cnv)
Determines if the converter contains ambiguous mappings of the same character or not.
const char * ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode)
Returns a standard name for a given converter name.
void ucnv_reset(UConverter *converter)
Resets the state of a converter to the default state.
UConverterType
Enum for specifying basic types of converters.
Definition ucnv.h:90
@ UCNV_HZ
Definition ucnv.h:143
@ UCNV_LMBCS_11
Definition ucnv.h:131
@ UCNV_ISO_2022
Definition ucnv.h:114
@ UCNV_LATIN_1
Definition ucnv.h:100
@ UCNV_COMPOUND_TEXT
Definition ucnv.h:163
@ UCNV_LMBCS_19
Definition ucnv.h:139
@ UCNV_UTF32_BigEndian
Definition ucnv.h:108
@ UCNV_LMBCS_17
Definition ucnv.h:135
@ UCNV_LMBCS_6
Definition ucnv.h:127
@ UCNV_UTF32
Definition ucnv.h:157
@ UCNV_ISCII
Definition ucnv.h:147
@ UCNV_LMBCS_3
Definition ucnv.h:121
@ UCNV_IMAP_MAILBOX
Definition ucnv.h:161
@ UCNV_UTF8
Definition ucnv.h:102
@ UCNV_SBCS
Definition ucnv.h:94
@ UCNV_UTF16_LittleEndian
Definition ucnv.h:106
@ UCNV_UNSUPPORTED_CONVERTER
Definition ucnv.h:92
@ UCNV_LMBCS_16
Definition ucnv.h:133
@ UCNV_UTF32_LittleEndian
Definition ucnv.h:110
@ UCNV_LMBCS_8
Definition ucnv.h:129
@ UCNV_US_ASCII
Definition ucnv.h:149
@ UCNV_LMBCS_2
Definition ucnv.h:119
@ UCNV_UTF16
Definition ucnv.h:155
@ UCNV_UTF16_BigEndian
Definition ucnv.h:104
@ UCNV_LMBCS_5
Definition ucnv.h:125
@ UCNV_EBCDIC_STATEFUL
Definition ucnv.h:112
@ UCNV_MBCS
Definition ucnv.h:98
@ UCNV_LMBCS_4
Definition ucnv.h:123
@ UCNV_SCSU
Definition ucnv.h:145
@ UCNV_UTF7
Definition ucnv.h:151
@ UCNV_LMBCS_18
Definition ucnv.h:137
@ UCNV_BOCU1
Definition ucnv.h:153
@ UCNV_LMBCS_LAST
Definition ucnv.h:141
@ UCNV_LMBCS_1
Definition ucnv.h:117
@ UCNV_DBCS
Definition ucnv.h:96
@ UCNV_CESU8
Definition ucnv.h:159
struct USet USet
Definition ucnv.h:67
void ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
Fill-up the list of alias names for the given alias.
void ucnv_getInvalidChars(const UConverter *converter, char *errBytes, int8_t *len, UErrorCode *err)
Fills in the output parameter, errBytes, with the error characters from the last failing conversion.
void ucnv_getUnicodeSet(const UConverter *cnv, USet *setFillIn, UConverterUnicodeSet whichSet, UErrorCode *pErrorCode)
Returns the set of Unicode code points that can be converted by an ICU converter.
const char * ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode)
This function will return the internal canonical converter name of the tagged alias.
void ucnv_getToUCallBack(const UConverter *converter, UConverterToUCallback *action, const void **context)
Gets the current calback function used by the converter when an illegal or invalid codepage sequence ...
void ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, char **target, const char *targetLimit, const char **source, const char *sourceLimit, UChar *pivotStart, UChar **pivotSource, UChar **pivotTarget, const UChar *pivotLimit, UBool reset, UBool flush, UErrorCode *pErrorCode)
Convert from one external charset to another using two existing UConverters.
void ucnv_setSubstString(UConverter *cnv, const UChar *s, int32_t length, UErrorCode *err)
Set a substitution string for converting from Unicode to a charset.
void ucnv_toUnicode(UConverter *converter, UChar **target, const UChar *targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
Converts a buffer of codepage bytes into an array of unicode UChars characters.
C UConverter predefined error callbacks.
struct UConverter UConverter
Definition ucnv_err.h:94
UConverterCallbackReason
The process condition code to be used with the callbacks.
Definition ucnv_err.h:155
C API: String Enumeration.
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition uenum.h:39
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition umachine.h:83
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition umachine.h:312
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition umachine.h:82
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:109
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129