ICU 57.1 57.1
ucsdet.h
Go to the documentation of this file.
1/*
2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: ucsdet.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2005Aug04
11 * created by: Andy Heninger
12 *
13 * ICU Character Set Detection, API for C
14 *
15 * Draft version 18 Oct 2005
16 *
17 */
18
19#ifndef __UCSDET_H
20#define __UCSDET_H
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_CONVERSION
25
27#include "unicode/uenum.h"
28
47
48
49struct UCharsetDetector;
55
56struct UCharsetMatch;
63
72U_STABLE UCharsetDetector * U_EXPORT2
74
84U_STABLE void U_EXPORT2
86
87#if U_SHOW_CPLUSPLUS_API
88
90
101
103
104#endif
105
121U_STABLE void U_EXPORT2
122ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
123
124
143U_STABLE void U_EXPORT2
144ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
145
146
172U_STABLE const UCharsetMatch * U_EXPORT2
174
175
206U_STABLE const UCharsetMatch ** U_EXPORT2
207ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
208
209
210
226U_STABLE const char * U_EXPORT2
228
252U_STABLE int32_t U_EXPORT2
254
284U_STABLE const char * U_EXPORT2
286
287
310U_STABLE int32_t U_EXPORT2
312 UChar *buf, int32_t cap, UErrorCode *status);
313
314
315
344U_STABLE UEnumeration * U_EXPORT2
346
357
358U_STABLE UBool U_EXPORT2
360
361
373U_STABLE UBool U_EXPORT2
375
376#ifndef U_HIDE_INTERNAL_API
390U_INTERNAL UEnumeration * U_EXPORT2
392
406U_INTERNAL void U_EXPORT2
407ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
408#endif /* U_HIDE_INTERNAL_API */
409
410#endif
411#endif /* __UCSDET_H */
412
413
"Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
UBool ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
Test whether input filtering is enabled for this charset detector.
const char * ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
Get the name of the charset represented by a UCharsetMatch.
const UCharsetMatch * ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
Return the charset that best matches the supplied input data.
int32_t ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status)
Get the entire input text as a UChar string, placing it into a caller-supplied buffer.
UEnumeration * ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of all detectable charsets - over the charsets that are known to the cha...
const char * ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
Get the RFC 3066 code for the language of the input data.
struct UCharsetMatch UCharsetMatch
Opaque structure representing a match that was identified from a charset detection operation.
Definition ucsdet.h:62
UEnumeration * ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of detectable charsets - over the charsets that are enabled by the speci...
void ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
Enable or disable individual charset encoding.
void ucsdet_close(UCharsetDetector *ucsd)
Close a charset detector.
struct UCharsetDetector UCharsetDetector
Structure representing a charset detector.
Definition ucsdet.h:54
void ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
Set the declared encoding for charset detection.
int32_t ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
Get a confidence number for the quality of the match of the byte data with the charset.
const UCharsetMatch ** ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status)
Find all charset matches that appear to be consistent with the input, returning an array of results.
void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
Set the input byte data whose charset is to detected.
UCharsetDetector * ucsdet_open(UErrorCode *status)
Open a charset detector.
UBool ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
Enable filtering of input text.
C API: String Enumeration.
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition uenum.h:39
#define U_INTERNAL
This is used to declare a function as an internal ICU C API.
Definition umachine.h:117
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:109
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129