ICU 57.1 57.1
uset.h
Go to the documentation of this file.
1/*
2*******************************************************************************
3*
4* Copyright (C) 2002-2014, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: uset.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2002mar07
14* created by: Markus W. Scherer
15*
16* C version of UnicodeSet.
17*/
18
19
27#ifndef __USET_H__
28#define __USET_H__
29
30#include "unicode/utypes.h"
31#include "unicode/uchar.h"
33
34#ifndef UCNV_H
35struct USet;
41typedef struct USet USet;
42#endif
43
49enum {
55
83
93};
94
205
206enum {
215
243
244/*********************************************************************
245 * USet API
246 *********************************************************************/
247
257
270
281uset_openPattern(const UChar* pattern, int32_t patternLength,
282 UErrorCode* ec);
283
297 uint32_t options,
298 UErrorCode* ec);
299
308
309#if U_SHOW_CPLUSPLUS_API
310
312
323
325
326#endif
327
338uset_clone(const USet *set);
339
350uset_isFrozen(const USet *set);
351
368
381
393 UChar32 start, UChar32 end);
394
416U_STABLE int32_t U_EXPORT2
418 const UChar *pattern, int32_t patternLength,
419 uint32_t options,
421
446 UProperty prop, int32_t value, UErrorCode* ec);
447
485 const UChar *prop, int32_t propLength,
486 const UChar *value, int32_t valueLength,
487 UErrorCode* ec);
488
500 int32_t pos);
501
517U_STABLE int32_t U_EXPORT2
519 UChar* result, int32_t resultCapacity,
521 UErrorCode* ec);
522
533
548
560
571uset_addString(USet* set, const UChar* str, int32_t strLen);
572
583uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
584
595
607
618uset_removeString(USet* set, const UChar* str, int32_t strLen);
619
633
649uset_retain(USet* set, UChar32 start, UChar32 end);
650
664uset_retainAll(USet* set, const USet* retain);
665
676
687
700uset_complementAll(USet* set, const USet* complement);
701
711
740
749
758uset_isEmpty(const USet* set);
759
770
781uset_containsRange(const USet* set, UChar32 start, UChar32 end);
782
792uset_containsString(const USet* set, const UChar* str, int32_t strLen);
793
804U_STABLE int32_t U_EXPORT2
805uset_indexOf(const USet* set, UChar32 c);
806
818uset_charAt(const USet* set, int32_t charIndex);
819
828U_STABLE int32_t U_EXPORT2
829uset_size(const USet* set);
830
839U_STABLE int32_t U_EXPORT2
841
860U_STABLE int32_t U_EXPORT2
861uset_getItem(const USet* set, int32_t itemIndex,
862 UChar32* start, UChar32* end,
863 UChar* str, int32_t strCapacity,
864 UErrorCode* ec);
865
876
888uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
889
900
911
931U_STABLE int32_t U_EXPORT2
932uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
933
952U_STABLE int32_t U_EXPORT2
953uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
954
974U_STABLE int32_t U_EXPORT2
975uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
976
995U_STABLE int32_t U_EXPORT2
996uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
997
1008
1009/*********************************************************************
1010 * Serialized set API
1011 *********************************************************************/
1012
1062U_STABLE int32_t U_EXPORT2
1063uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1064
1075
1085
1096
1106U_STABLE int32_t U_EXPORT2
1108
1125
1126#endif
"Smart pointer" base class; do not use directly: use LocalPointer etc.
"Smart pointer" class, closes a USet via uset_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:221
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:241
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:231
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:226
int32_t length
The total length of the array.
Definition uset.h:236
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:161
struct USet USet
Definition ucnv.h:67
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:109
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
void uset_clear(USet *set)
Removes all of the elements from this set.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:150
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:163
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:178
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:203
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:198
void uset_complement(USet *set)
Inverts this set.
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
USet * uset_clone(const USet *set)
Returns a copy of this object.
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
void uset_freeze(USet *set)
Freeze the set (make it immutable).
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:213
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:54
@ USET_ADD_CASE_MAPPINGS
Enable case insensitive matching.
Definition uset.h:92
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:82
void uset_close(USet *set)
Disposes of the storage used by a USet object.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
USet * uset_openEmpty(void)
Create an empty USet object.
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129