ICU 57.1 57.1
normalizer2.h
Go to the documentation of this file.
1/*
2*******************************************************************************
3*
4* Copyright (C) 2009-2013, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: normalizer2.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009nov22
14* created by: Markus W. Scherer
15*/
16
17#ifndef __NORMALIZER2_H__
18#define __NORMALIZER2_H__
19
25#include "unicode/utypes.h"
26
27#if !UCONFIG_NO_NORMALIZATION
28
29#include "unicode/uniset.h"
30#include "unicode/unistr.h"
31#include "unicode/unorm2.h"
32
34
79public:
85
97 static const Normalizer2 *
99
111 static const Normalizer2 *
113
125 static const Normalizer2 *
127
139 static const Normalizer2 *
141
153 static const Normalizer2 *
155
177 static const Normalizer2 *
179 const char *name,
181 UErrorCode &errorCode);
182
194 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
196 normalize(src, result, errorCode);
197 return result;
198 }
212 virtual UnicodeString &
215 UErrorCode &errorCode) const = 0;
230 virtual UnicodeString &
232 const UnicodeString &second,
233 UErrorCode &errorCode) const = 0;
248 virtual UnicodeString &
250 const UnicodeString &second,
251 UErrorCode &errorCode) const = 0;
252
266 virtual UBool
268
293 virtual UBool
295
311 virtual UChar32
313
322 virtual uint8_t
324
339 virtual UBool
340 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
341
358 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
359
382 virtual int32_t
383 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
384
398 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
399
414 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
415
429 virtual UBool isInert(UChar32 c) const = 0;
430};
431
444public:
456 norm2(n2), set(filterSet) {}
457
463
477 virtual UnicodeString &
480 UErrorCode &errorCode) const;
495 virtual UnicodeString &
497 const UnicodeString &second,
498 UErrorCode &errorCode) const;
513 virtual UnicodeString &
515 const UnicodeString &second,
516 UErrorCode &errorCode) const;
517
529 virtual UBool
531
543 virtual UBool
545
556 virtual UChar32
558
567 virtual uint8_t
569
581 virtual UBool
582 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
595 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
607 virtual int32_t
608 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
609
619
629
637 virtual UBool isInert(UChar32 c) const;
638private:
640 normalize(const UnicodeString &src,
643 UErrorCode &errorCode) const;
644
646 normalizeSecondAndAppend(UnicodeString &first,
647 const UnicodeString &second,
649 UErrorCode &errorCode) const;
650
651 const Normalizer2 &norm2;
652 const UnicodeSet &set;
653};
654
656
657#endif // !UCONFIG_NO_NORMALIZATION
658#endif // __NORMALIZER2_H__
Normalization filtered by a UnicodeSet.
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const
Returns the end of the normalized substring of the input string.
virtual UBool getRawDecomposition(UChar32 c, UnicodeString &decomposition) const
Gets the raw decomposition mapping of c.
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const
Tests if the string is normalized.
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const
Gets the decomposition mapping of c.
virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const
Writes the normalized form of the source string to the destination string (replacing its contents) an...
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const
Appends the second string to the first string (merging them at the boundary) and returns the first st...
virtual UChar32 composePair(UChar32 a, UChar32 b) const
Performs pairwise composition of a & b and returns the composite if there is one.
virtual UBool hasBoundaryAfter(UChar32 c) const
Tests if the character always has a normalization boundary after it, regardless of context.
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet)
Constructs a filtered normalizer wrapping any Normalizer2 instance and a filter set.
virtual uint8_t getCombiningClass(UChar32 c) const
Gets the combining class of c.
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const
Tests if the string is normalized.
virtual UBool hasBoundaryBefore(UChar32 c) const
Tests if the character always has a normalization boundary before it, regardless of context.
virtual UBool isInert(UChar32 c) const
Tests if the character is normalization-inert.
~FilteredNormalizer2()
Destructor.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition normalizer2.h:78
static const Normalizer2 * getNFKCCasefoldInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
static const Normalizer2 * getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode &errorCode)
Returns a Normalizer2 instance which uses the specified data file (packageName/name similar to ucnv_o...
UnicodeString normalize(const UnicodeString &src, UErrorCode &errorCode) const
Returns the normalized form of the source string.
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const =0
Returns the end of the normalized substring of the input string.
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
static const Normalizer2 * getNFCInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFC normalization.
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the second string to the first string (merging them at the boundary) and returns the first st...
virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const =0
Writes the normalized form of the source string to the destination string (replacing its contents) an...
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
~Normalizer2()
Destructor.
static const Normalizer2 * getNFKCInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFKC normalization.
virtual UBool hasBoundaryBefore(UChar32 c) const =0
Tests if the character always has a normalization boundary before it, regardless of context.
virtual uint8_t getCombiningClass(UChar32 c) const
Gets the combining class of c.
virtual UBool hasBoundaryAfter(UChar32 c) const =0
Tests if the character always has a normalization boundary after it, regardless of context.
static const Normalizer2 * getNFKDInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFKD normalization.
virtual UBool isInert(UChar32 c) const =0
Tests if the character is normalization-inert.
virtual UBool getRawDecomposition(UChar32 c, UnicodeString &decomposition) const
Gets the raw decomposition mapping of c.
virtual UChar32 composePair(UChar32 a, UChar32 b) const
Performs pairwise composition of a & b and returns the composite if there is one.
static const Normalizer2 * getNFDInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFD normalization.
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const =0
Gets the decomposition mapping of c.
UObject is the common ICU "boilerplate" class.
Definition uobject.h:221
A mutable set of Unicode characters and multicharacter strings.
Definition uniset.h:276
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:294
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:332
C++ API: Unicode Set.
C++ API: Unicode String.
C API: New API for Unicode Normalization.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition unorm2.h:91
UNormalization2Mode
Constants for normalization modes.
Definition unorm2.h:42
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:150
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129