00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef __NORMALIZER2_H__
00020 #define __NORMALIZER2_H__
00021
00027 #include "unicode/utypes.h"
00028
00029 #if U_SHOW_CPLUSPLUS_API
00030
00031 #if !UCONFIG_NO_NORMALIZATION
00032
00033 #include "unicode/stringpiece.h"
00034 #include "unicode/uniset.h"
00035 #include "unicode/unistr.h"
00036 #include "unicode/unorm2.h"
00037
00038 U_NAMESPACE_BEGIN
00039
00040 class ByteSink;
00041
00085 class U_COMMON_API Normalizer2 : public UObject {
00086 public:
00091 ~Normalizer2();
00092
00104 static const Normalizer2 *
00105 getNFCInstance(UErrorCode &errorCode);
00106
00118 static const Normalizer2 *
00119 getNFDInstance(UErrorCode &errorCode);
00120
00132 static const Normalizer2 *
00133 getNFKCInstance(UErrorCode &errorCode);
00134
00146 static const Normalizer2 *
00147 getNFKDInstance(UErrorCode &errorCode);
00148
00160 static const Normalizer2 *
00161 getNFKCCasefoldInstance(UErrorCode &errorCode);
00162
00184 static const Normalizer2 *
00185 getInstance(const char *packageName,
00186 const char *name,
00187 UNormalization2Mode mode,
00188 UErrorCode &errorCode);
00189
00200 UnicodeString
00201 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00202 UnicodeString result;
00203 normalize(src, result, errorCode);
00204 return result;
00205 }
00219 virtual UnicodeString &
00220 normalize(const UnicodeString &src,
00221 UnicodeString &dest,
00222 UErrorCode &errorCode) const = 0;
00223
00246 virtual void
00247 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00248 Edits *edits, UErrorCode &errorCode) const;
00249
00264 virtual UnicodeString &
00265 normalizeSecondAndAppend(UnicodeString &first,
00266 const UnicodeString &second,
00267 UErrorCode &errorCode) const = 0;
00282 virtual UnicodeString &
00283 append(UnicodeString &first,
00284 const UnicodeString &second,
00285 UErrorCode &errorCode) const = 0;
00286
00300 virtual UBool
00301 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00302
00327 virtual UBool
00328 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00329
00345 virtual UChar32
00346 composePair(UChar32 a, UChar32 b) const;
00347
00356 virtual uint8_t
00357 getCombiningClass(UChar32 c) const;
00358
00373 virtual UBool
00374 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00394 virtual UBool
00395 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
00396
00397
00413 virtual UNormalizationCheckResult
00414 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00415
00438 virtual int32_t
00439 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00440
00454 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00455
00470 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00471
00485 virtual UBool isInert(UChar32 c) const = 0;
00486 };
00487
00499 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00500 public:
00511 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00512 norm2(n2), set(filterSet) {}
00513
00518 ~FilteredNormalizer2();
00519
00533 virtual UnicodeString &
00534 normalize(const UnicodeString &src,
00535 UnicodeString &dest,
00536 UErrorCode &errorCode) const U_OVERRIDE;
00537
00560 virtual void
00561 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00562 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
00563
00578 virtual UnicodeString &
00579 normalizeSecondAndAppend(UnicodeString &first,
00580 const UnicodeString &second,
00581 UErrorCode &errorCode) const U_OVERRIDE;
00596 virtual UnicodeString &
00597 append(UnicodeString &first,
00598 const UnicodeString &second,
00599 UErrorCode &errorCode) const U_OVERRIDE;
00600
00612 virtual UBool
00613 getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00614
00626 virtual UBool
00627 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00628
00639 virtual UChar32
00640 composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
00641
00650 virtual uint8_t
00651 getCombiningClass(UChar32 c) const U_OVERRIDE;
00652
00664 virtual UBool
00665 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00685 virtual UBool
00686 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
00698 virtual UNormalizationCheckResult
00699 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00711 virtual int32_t
00712 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00713
00722 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
00723
00732 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
00733
00741 virtual UBool isInert(UChar32 c) const U_OVERRIDE;
00742 private:
00743 UnicodeString &
00744 normalize(const UnicodeString &src,
00745 UnicodeString &dest,
00746 USetSpanCondition spanCondition,
00747 UErrorCode &errorCode) const;
00748
00749 void
00750 normalizeUTF8(uint32_t options, const char *src, int32_t length,
00751 ByteSink &sink, Edits *edits,
00752 USetSpanCondition spanCondition,
00753 UErrorCode &errorCode) const;
00754
00755 UnicodeString &
00756 normalizeSecondAndAppend(UnicodeString &first,
00757 const UnicodeString &second,
00758 UBool doNormalize,
00759 UErrorCode &errorCode) const;
00760
00761 const Normalizer2 &norm2;
00762 const UnicodeSet &set;
00763 };
00764
00765 U_NAMESPACE_END
00766
00767 #endif // !UCONFIG_NO_NORMALIZATION
00768
00769 #endif
00770
00771 #endif // __NORMALIZER2_H__