ICU 57.1 57.1
uspoof.h
Go to the documentation of this file.
1/*
2***************************************************************************
3* Copyright (C) 2008-2015, International Business Machines Corporation
4* and others. All Rights Reserved.
5***************************************************************************
6* file name: uspoof.h
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2008Feb13
12* created by: Andy Heninger
13*
14* Unicode Spoof Detection
15*/
16
17#ifndef USPOOF_H
18#define USPOOF_H
19
20#include "unicode/utypes.h"
21#include "unicode/uset.h"
22#include "unicode/parseerr.h"
24
25#if !UCONFIG_NO_NORMALIZATION
26
27
28#if U_SHOW_CPLUSPLUS_API
29#include "unicode/unistr.h"
30#include "unicode/uniset.h"
31#endif
32
33
152struct USpoofChecker;
162typedef enum USpoofChecks {
169
179
190
198
213
214#ifndef U_HIDE_DEPRECATED_API
221#endif /* U_HIDE_DEPRECATED_API */
222
230
236
244
251
264 USPOOF_AUX_INFO = 0x40000000
265
267
268
274 typedef enum URestrictionLevel {
280 USPOOF_ASCII = 0x10000000,
320
331U_STABLE USpoofChecker * U_EXPORT2
333
334
356U_STABLE USpoofChecker * U_EXPORT2
357uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
358 UErrorCode *pErrorCode);
359
391U_STABLE USpoofChecker * U_EXPORT2
392uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
393 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
394 int32_t *errType, UParseError *pe, UErrorCode *status);
395
396
402U_STABLE void U_EXPORT2
404
405#if U_SHOW_CPLUSPLUS_API
406
408
419
421
422#endif
423
433U_STABLE USpoofChecker * U_EXPORT2
435
436
449U_STABLE void U_EXPORT2
450uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
451
463U_STABLE int32_t U_EXPORT2
465
474U_STABLE void U_EXPORT2
476
477
487
530U_STABLE void U_EXPORT2
531uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
532
554U_STABLE const char * U_EXPORT2
556
557
576U_STABLE void U_EXPORT2
578
579
600U_STABLE const USet * U_EXPORT2
602
603
604#if U_SHOW_CPLUSPLUS_API
623U_STABLE void U_EXPORT2
624uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
625
626
647U_STABLE const icu::UnicodeSet * U_EXPORT2
649#endif
650
651
678U_STABLE int32_t U_EXPORT2
680 const UChar *id, int32_t length,
681 int32_t *position,
682 UErrorCode *status);
683
684
712U_STABLE int32_t U_EXPORT2
714 const char *id, int32_t length,
715 int32_t *position,
716 UErrorCode *status);
717
718
719#if U_SHOW_CPLUSPLUS_API
743U_STABLE int32_t U_EXPORT2
745 const icu::UnicodeString &id,
746 int32_t *position,
747 UErrorCode *status);
748
749#endif
750
751
791U_STABLE int32_t U_EXPORT2
793 const UChar *id1, int32_t length1,
794 const UChar *id2, int32_t length2,
795 UErrorCode *status);
796
797
798
824U_STABLE int32_t U_EXPORT2
826 const char *id1, int32_t length1,
827 const char *id2, int32_t length2,
828 UErrorCode *status);
829
830
831
832
833#if U_SHOW_CPLUSPLUS_API
855U_STABLE int32_t U_EXPORT2
857 const icu::UnicodeString &s1,
858 const icu::UnicodeString &s2,
859 UErrorCode *status);
860#endif
861
862
895U_STABLE int32_t U_EXPORT2
897 uint32_t type,
898 const UChar *id, int32_t length,
899 UChar *dest, int32_t destCapacity,
900 UErrorCode *status);
901
937U_STABLE int32_t U_EXPORT2
939 uint32_t type,
940 const char *id, int32_t length,
941 char *dest, int32_t destCapacity,
942 UErrorCode *status);
943
944#if U_SHOW_CPLUSPLUS_API
970U_I18N_API icu::UnicodeString & U_EXPORT2
972 uint32_t type,
973 const icu::UnicodeString &id,
974 icu::UnicodeString &dest,
975 UErrorCode *status);
976#endif /* U_SHOW_CPLUSPLUS_API */
977
989U_STABLE const USet * U_EXPORT2
991
1003U_STABLE const USet * U_EXPORT2
1005
1006#if U_SHOW_CPLUSPLUS_API
1007
1019U_STABLE const icu::UnicodeSet * U_EXPORT2
1021
1033U_STABLE const icu::UnicodeSet * U_EXPORT2
1035
1036#endif /* U_SHOW_CPLUSPLUS_API */
1037
1060U_STABLE int32_t U_EXPORT2
1062 void *data, int32_t capacity,
1063 UErrorCode *status);
1064
1065
1066#endif
1067
1068#endif /* USPOOF_H */
"Smart pointer" class, closes a USpoofChecker via uspoof_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:539
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
struct USet USet
Definition: ucnv.h:67
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
C++ API: Unicode Set.
C++ API: Unicode String.
C API: Unicode Set.
int32_t uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition: uspoof.h:162
@ USPOOF_MIXED_NUMBERS
Check that an identifier does not include decimal digits from more than one numbering system.
Definition: uspoof.h:243
@ USPOOF_RESTRICTION_LEVEL
Check that an identifier is no looser than the specified RestrictionLevel.
Definition: uspoof.h:212
@ USPOOF_CHAR_LIMIT
Check that an identifier contains only characters from a specified set of acceptable characters.
Definition: uspoof.h:235
@ USPOOF_ALL_CHECKS
Enable all spoof checks.
Definition: uspoof.h:250
@ USPOOF_SINGLE_SCRIPT
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition: uspoof.h:220
@ USPOOF_INVISIBLE
Check an identifier for the presence of invisible characters, such as zero-width spaces,...
Definition: uspoof.h:229
@ USPOOF_MIXED_SCRIPT_CONFUSABLE
Mixed script confusable test.
Definition: uspoof.h:178
@ USPOOF_AUX_INFO
Enable the return of auxillary (non-error) information in the upper bits of the check results value.
Definition: uspoof.h:264
@ USPOOF_WHOLE_SCRIPT_CONFUSABLE
Whole script confusable test.
Definition: uspoof.h:189
@ USPOOF_SINGLE_SCRIPT_CONFUSABLE
Single script confusable test.
Definition: uspoof.h:168
@ USPOOF_ANY_CASE
Any Case Modifier for confusable identifier tests.
Definition: uspoof.h:197
int32_t uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
void uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the set of checks that will be performed by the check functions of this Spoof Checker.
const USet * uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in Unicode UAX #31,...
void uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed.
U_I18N_API icu::UnicodeString & uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
URestrictionLevel uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
URestrictionLevel
Constants from UAX #39 for use in setRestrictionLevel(), and for returned identifier restriction leve...
Definition: uspoof.h:274
@ USPOOF_SINGLE_SCRIPT_RESTRICTIVE
All characters in each identifier must be from a single script.
Definition: uspoof.h:286
@ USPOOF_MODERATELY_RESTRICTIVE
Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restric...
Definition: uspoof.h:300
@ USPOOF_ASCII
Only ASCII characters: U+0000..U+007F.
Definition: uspoof.h:280
@ USPOOF_RESTRICTION_LEVEL_MASK
Mask for selecting the Restriction Level bits from the return value of uspoof_check().
Definition: uspoof.h:318
@ USPOOF_MINIMALLY_RESTRICTIVE
Allow arbitrary mixtures of scripts.
Definition: uspoof.h:306
@ USPOOF_HIGHLY_RESTRICTIVE
All characters in each identifier must be from a single script, or from the combinations: Latin + Han...
Definition: uspoof.h:294
@ USPOOF_UNRESTRICTIVE
Any valid identifiers, including characters outside of the Identifier Profile.
Definition: uspoof.h:312
void uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
int32_t uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
int32_t uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
const icu::UnicodeSet * uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in Unicod...
const USet * uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in Unicod...
const USet * uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
int32_t uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
int32_t uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
void uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
int32_t uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
int32_t uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
int32_t uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
void uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
const icu::UnicodeSet * uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in Unicode UAX #31,...
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition: uspoof.h:153
const icu::UnicodeSet * uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
const char * uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
int32_t uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:358
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129