ICU 65.1 65.1
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
35
36#ifndef USET_DEFINED
37
38#ifndef U_IN_DOXYGEN
39#define USET_DEFINED
40#endif
47typedef struct USet USet;
48#endif
49
55enum {
61
89
99};
100
156typedef enum USetSpanCondition {
205#ifndef U_HIDE_DEPRECATED_API
211#endif // U_HIDE_DEPRECATED_API
213
214enum {
223
251
252/*********************************************************************
253 * USet API
254 *********************************************************************/
255
263U_STABLE USet* U_EXPORT2
265
276U_STABLE USet* U_EXPORT2
278
288U_STABLE USet* U_EXPORT2
289uset_openPattern(const UChar* pattern, int32_t patternLength,
290 UErrorCode* ec);
291
303U_STABLE USet* U_EXPORT2
304uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
305 uint32_t options,
306 UErrorCode* ec);
307
314U_STABLE void U_EXPORT2
316
317#if U_SHOW_CPLUSPLUS_API
318
319U_NAMESPACE_BEGIN
320
331
332U_NAMESPACE_END
333
334#endif
335
345U_STABLE USet * U_EXPORT2
346uset_clone(const USet *set);
347
357U_STABLE UBool U_EXPORT2
358uset_isFrozen(const USet *set);
359
374U_STABLE void U_EXPORT2
376
387U_STABLE USet * U_EXPORT2
389
399U_STABLE void U_EXPORT2
401 UChar32 start, UChar32 end);
402
424U_STABLE int32_t U_EXPORT2
426 const UChar *pattern, int32_t patternLength,
427 uint32_t options,
428 UErrorCode *status);
429
452U_STABLE void U_EXPORT2
454 UProperty prop, int32_t value, UErrorCode* ec);
455
491U_STABLE void U_EXPORT2
493 const UChar *prop, int32_t propLength,
494 const UChar *value, int32_t valueLength,
495 UErrorCode* ec);
496
506U_STABLE UBool U_EXPORT2
507uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
508 int32_t pos);
509
525U_STABLE int32_t U_EXPORT2
527 UChar* result, int32_t resultCapacity,
528 UBool escapeUnprintable,
529 UErrorCode* ec);
530
539U_STABLE void U_EXPORT2
541
554U_STABLE void U_EXPORT2
555uset_addAll(USet* set, const USet *additionalSet);
556
566U_STABLE void U_EXPORT2
568
578U_STABLE void U_EXPORT2
579uset_addString(USet* set, const UChar* str, int32_t strLen);
580
590U_STABLE void U_EXPORT2
591uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
592
601U_STABLE void U_EXPORT2
603
613U_STABLE void U_EXPORT2
615
625U_STABLE void U_EXPORT2
626uset_removeString(USet* set, const UChar* str, int32_t strLen);
627
639U_STABLE void U_EXPORT2
640uset_removeAll(USet* set, const USet* removeSet);
641
656U_STABLE void U_EXPORT2
657uset_retain(USet* set, UChar32 start, UChar32 end);
658
671U_STABLE void U_EXPORT2
672uset_retainAll(USet* set, const USet* retain);
673
682U_STABLE void U_EXPORT2
684
693U_STABLE void U_EXPORT2
695
707U_STABLE void U_EXPORT2
708uset_complementAll(USet* set, const USet* complement);
709
717U_STABLE void U_EXPORT2
719
746U_STABLE void U_EXPORT2
747uset_closeOver(USet* set, int32_t attributes);
748
755U_STABLE void U_EXPORT2
757
765U_STABLE UBool U_EXPORT2
766uset_isEmpty(const USet* set);
767
776U_STABLE UBool U_EXPORT2
778
788U_STABLE UBool U_EXPORT2
789uset_containsRange(const USet* set, UChar32 start, UChar32 end);
790
799U_STABLE UBool U_EXPORT2
800uset_containsString(const USet* set, const UChar* str, int32_t strLen);
801
812U_STABLE int32_t U_EXPORT2
813uset_indexOf(const USet* set, UChar32 c);
814
825U_STABLE UChar32 U_EXPORT2
826uset_charAt(const USet* set, int32_t charIndex);
827
836U_STABLE int32_t U_EXPORT2
837uset_size(const USet* set);
838
847U_STABLE int32_t U_EXPORT2
849
868U_STABLE int32_t U_EXPORT2
869uset_getItem(const USet* set, int32_t itemIndex,
870 UChar32* start, UChar32* end,
871 UChar* str, int32_t strCapacity,
872 UErrorCode* ec);
873
882U_STABLE UBool U_EXPORT2
883uset_containsAll(const USet* set1, const USet* set2);
884
895U_STABLE UBool U_EXPORT2
896uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
897
906U_STABLE UBool U_EXPORT2
907uset_containsNone(const USet* set1, const USet* set2);
908
917U_STABLE UBool U_EXPORT2
918uset_containsSome(const USet* set1, const USet* set2);
919
939U_STABLE int32_t U_EXPORT2
940uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
941
960U_STABLE int32_t U_EXPORT2
961uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
962
982U_STABLE int32_t U_EXPORT2
983uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
984
1003U_STABLE int32_t U_EXPORT2
1004uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1005
1014U_STABLE UBool U_EXPORT2
1015uset_equals(const USet* set1, const USet* set2);
1016
1017/*********************************************************************
1018 * Serialized set API
1019 *********************************************************************/
1020
1070U_STABLE int32_t U_EXPORT2
1071uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1072
1081U_STABLE UBool U_EXPORT2
1082uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1083
1091U_STABLE void U_EXPORT2
1093
1102U_STABLE UBool U_EXPORT2
1104
1114U_STABLE int32_t U_EXPORT2
1116
1130U_STABLE UBool U_EXPORT2
1131uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1132 UChar32* pStart, UChar32* pEnd);
1133
1134#endif
"Smart pointer" class, closes a USet via uset_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:229
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:249
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:239
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:234
int32_t length
The total length of the array.
Definition uset.h:244
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:195
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:425
int8_t UBool
The ICU boolean type.
Definition umachine.h:261
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:111
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:60
@ USET_ADD_CASE_MAPPINGS
Enable case insensitive matching.
Definition uset.h:98
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:88
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
void uset_clear(USet *set)
Removes all of the elements from this set.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:156
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:169
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:184
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:210
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:204
void uset_complement(USet *set)
Inverts this set.
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
USet * uset_clone(const USet *set)
Returns a copy of this object.
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
void uset_freeze(USet *set)
Freeze the set (make it immutable).
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:221
void uset_close(USet *set)
Disposes of the storage used by a USet object.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:47
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
USet * uset_openEmpty(void)
Create an empty USet object.
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:415