ICU 57.1 57.1
tblcoll.h
Go to the documentation of this file.
1/*
2******************************************************************************
3* Copyright (C) 1996-2016, International Business Machines Corporation and
4* others. All Rights Reserved.
5******************************************************************************
6*/
7
60#ifndef TBLCOLL_H
61#define TBLCOLL_H
62
63#include "unicode/utypes.h"
64
65#if !UCONFIG_NO_COLLATION
66
67#include "unicode/coll.h"
68#include "unicode/locid.h"
69#include "unicode/uiter.h"
70#include "unicode/ucol.h"
71
73
75struct CollationData;
81class StringSearch;
86class CollationKey;
87class SortKeyByteSink;
88class UnicodeSet;
89class UnicodeString;
90class UVector64;
91
112public:
122
135
148
163
164#ifndef U_HIDE_INTERNAL_API
171 UErrorCode &errorCode);
172#endif /* U_HIDE_INTERNAL_API */
173
180
181
199 RuleBasedCollator(const uint8_t *bin, int32_t length,
200 const RuleBasedCollator *base,
202
208
215
222 virtual UBool operator==(const Collator& other) const;
223
229 virtual Collator* clone(void) const;
230
242 const UnicodeString& source) const;
243
254 const CharacterIterator& source) const;
255
256 // Make deprecated versions of Collator::compare() visible.
257 using Collator::compare;
258
272 const UnicodeString& target,
273 UErrorCode &status) const;
274
289 const UnicodeString& target,
290 int32_t length,
291 UErrorCode &status) const;
292
309 virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
310 const UChar* target, int32_t targetLength,
311 UErrorCode &status) const;
312
326 UErrorCode &status) const;
327
342 const StringPiece &target,
343 UErrorCode &status) const;
344
361 UErrorCode& status) const;
362
378 virtual CollationKey& getCollationKey(const UChar *source,
379 int32_t sourceLength,
381 UErrorCode& status) const;
382
388 virtual int32_t hashCode() const;
389
401
407 const UnicodeString& getRules() const;
408
414 virtual void getVersion(UVersionInfo info) const;
415
416#ifndef U_HIDE_DEPRECATED_API
433 int32_t getMaxExpansion(int32_t order) const;
434#endif /* U_HIDE_DEPRECATED_API */
435
446 virtual UClassID getDynamicClassID(void) const;
447
460
461#ifndef U_HIDE_DEPRECATED_API
472 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
473#endif /* U_HIDE_DEPRECATED_API */
474
485 int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
486
499
509
518 UErrorCode &status) const;
519
537
545
562 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
563
580
592 virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
593
601 virtual uint32_t getVariableTop(UErrorCode &status) const;
602
613
628 virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
629 int32_t resultLength) const;
630
647 virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
648 uint8_t *result, int32_t resultLength) const;
649
663 virtual int32_t getReorderCodes(int32_t *dest,
664 int32_t destCapacity,
665 UErrorCode& status) const;
666
678 virtual void setReorderCodes(const int32_t* reorderCodes,
679 int32_t reorderCodesLength,
681
687 const char *left, int32_t leftLength,
688 const char *right, int32_t rightLength,
689 UErrorCode &errorCode) const;
690
714 virtual int32_t internalGetShortDefinitionString(const char *locale,
715 char *buffer,
716 int32_t capacity,
717 UErrorCode &status) const;
718
723 virtual int32_t internalNextSortKeyPart(
724 UCharIterator *iter, uint32_t state[2],
725 uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
726
727 // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
733
734#ifndef U_HIDE_INTERNAL_API
741 const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
742
757 UBool addPrefixes, UErrorCode &errorCode) const;
758
764 void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
765
771 const UnicodeString &rules,
772 int32_t strength,
775 UErrorCode &errorCode);
776
779 return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
780 }
782 static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
783 return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
784 }
785
790 void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
791#endif // U_HIDE_INTERNAL_API
792
793protected:
801 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
802
803private:
804 friend class CollationElementIterator;
805 friend class Collator;
806
808
814 enum Attributes {
815 ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
816 ATTR_LIMIT
817 };
818
819 void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
820
821 // Both lengths must be <0 or else both must be >=0.
822 UCollationResult doCompare(const UChar *left, int32_t leftLength,
823 const UChar *right, int32_t rightLength,
824 UErrorCode &errorCode) const;
825 UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
826 const uint8_t *right, int32_t rightLength,
827 UErrorCode &errorCode) const;
828
829 void writeSortKey(const UChar *s, int32_t length,
830 SortKeyByteSink &sink, UErrorCode &errorCode) const;
831
832 void writeIdenticalLevel(const UChar *s, const UChar *limit,
833 SortKeyByteSink &sink, UErrorCode &errorCode) const;
834
835 const CollationSettings &getDefaultSettings() const;
836
837 void setAttributeDefault(int32_t attribute) {
838 explicitlySetAttributes &= ~((uint32_t)1 << attribute);
839 }
840 void setAttributeExplicitly(int32_t attribute) {
841 explicitlySetAttributes |= (uint32_t)1 << attribute;
842 }
843 UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
844 // assert(0 <= attribute < ATTR_LIMIT);
845 return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
846 }
847
855 UBool isUnsafe(UChar32 c) const;
856
857 static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
858 UBool initMaxExpansions(UErrorCode &errorCode) const;
859
860 void setFastLatinOptions(CollationSettings &ownedSettings) const;
861
862 const CollationData *data;
863 const CollationSettings *settings; // reference-counted
864 const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
865 const CollationCacheEntry *cacheEntry; // reference-counted
866 Locale validLocale;
867 uint32_t explicitlySetAttributes;
868
869 UBool actualLocaleIsSameAsValid;
870};
871
873
874#endif // !UCONFIG_NO_COLLATION
875#endif // TBLCOLL_H
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:356
The CollationElementIterator class is used as an iterator to walk through each character of an inte...
Definition coleitr.h:116
Collation keys are generated by the Collator class.
Definition sortkey.h:97
The Collator class performs locale-sensitive string comparison.
Definition coll.h:163
static Collator * fromUCollator(UCollator *uc)
Definition coll.h:1164
ECollationStrength
Base letter represents a primary difference.
Definition coll.h:194
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:185
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables.
Definition tblcoll.h:111
virtual CollationElementIterator * createCollationElementIterator(const UnicodeString &source) const
Creates a collation element iterator for the source string.
static UClassID getStaticClassID(void)
Returns the class ID for this class.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const
Universal attribute getter.
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition tblcoll.h:782
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status)
Sets the variable top to the primary weight of the specified string.
void getRules(UColRuleOption delta, UnicodeString &buffer) const
Returns current rules.
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const
Does the same thing as compare but limits the comparison to a specified length.
void internalGetContractionsAndExpansions(UnicodeSet *contractions, UnicodeSet *expansions, UBool addPrefixes, UErrorCode &errorCode) const
Implements ucol_getContractionsAndExpansions().
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition tblcoll.h:778
virtual UBool operator==(const Collator &other) const
Returns true if argument is the same as this object.
virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status)
Sets the variable top to the primary weight of the specified string.
virtual ~RuleBasedCollator()
Destructor.
RuleBasedCollator()
Only for use in ucol_openRules().
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const
The comparison function compares the character data stored in two different strings.
virtual CollationKey & getCollationKey(const UChar *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const
Transforms a specified region of the string into a series of characters that can be compared with Col...
virtual int32_t hashCode() const
Generates the hash code for the rule-based collation object.
RuleBasedCollator & operator=(const RuleBasedCollator &other)
Assignment operator.
RuleBasedCollator(const RuleBasedCollator &other)
Copy constructor.
uint8_t * cloneRuleData(int32_t &length, UErrorCode &status) const
Do not use this method: The caller and the ICU library might use different heaps.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const
Creates a binary image of a collator.
const UnicodeString & getRules() const
Gets the tailoring rules for this collator.
virtual CollationElementIterator * createCollationElementIterator(const CharacterIterator &source) const
Creates a collation element iterator for the source.
virtual void setVariableTop(uint32_t varTop, UErrorCode &status)
Sets the variable top to the specified primary weight.
RuleBasedCollator(const UnicodeString &rules, UErrorCode &status)
RuleBasedCollator constructor.
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const
Adds the contractions that start with character c to the set.
RuleBasedCollator(const uint8_t *bin, int32_t length, const RuleBasedCollator *base, UErrorCode &status)
Opens a collator from a collator binary image created using cloneBinary.
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const
Get the sort key as an array of bytes from a UnicodeString.
int32_t getMaxExpansion(int32_t order) const
Returns the maximum length of any expansion sequences that end with the specified comparison order.
virtual uint32_t getVariableTop(UErrorCode &status) const
Gets the variable top value of a Collator.
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual UCollationResult compare(const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength, UErrorCode &status) const
The comparison function compares the character data stored in two different string arrays.
RuleBasedCollator(const UnicodeString &rules, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const
Transforms the string into a series of characters that can be compared with CollationKey....
void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const
Appends the CEs for the string to the vector.
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
virtual Collator * clone(void) const
Makes a copy of this object.
void internalBuildTailoring(const UnicodeString &rules, int32_t strength, UColAttributeValue decompositionMode, UParseError *outParseError, UnicodeString *outReason, UErrorCode &errorCode)
Implements from-rule constructors, and ucol_openRules().
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)
Universal attribute setter.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
virtual void getVersion(UVersionInfo info) const
Gets the version information for a Collator.
RuleBasedCollator(const UnicodeString &rules, UParseError &parseError, UnicodeString &reason, UErrorCode &errorCode)
TODO: document & propose as public API.
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
virtual UCollationResult compare(UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const
Compares two strings using the Collator.
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Gets the locale of the Collator.
const char * internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const
Implements ucol_getLocaleByType().
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UErrorCode &status)
RuleBasedCollator constructor.
virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const
Get the sort key as an array of bytes from a UChar buffer.
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
A string-like object that points to a sized piece of memory.
Definition stringpiece.h:52
StringSearch is a SearchIterator that provides language-sensitive text searching based on the compari...
Definition stsearch.h:133
A mutable set of Unicode characters and multicharacter strings.
Definition uniset.h:276
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:294
C++ API: Collation Service.
C++ API: Locale ID object.
C API for code unit iteration.
Definition uiter.h:339
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:56
C API: Collator.
struct UCollator UCollator
structure representing a collator object instance
Definition ucol.h:56
UColRuleOption
Options for retrieving the rule string.
Definition ucol.h:346
UColAttribute
Attributes that collation service understands.
Definition ucol.h:234
@ UCOL_ATTRIBUTE_COUNT
The number of UColAttribute constants.
Definition ucol.h:340
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition ucol.h:71
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition ucol.h:87
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
Definition ucol.h:139
C API: Unicode Character Iteration.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:336
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition umachine.h:312
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition utypes.h:358
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition uversion.h:57
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129