ICU 57.1  57.1
uregex.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 2004-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: uregex.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2004mar09
11 * created by: Andy Heninger
12 *
13 * ICU Regular Expressions, API for C
14 */
15 
23 #ifndef UREGEX_H
24 #define UREGEX_H
25 
26 #include "unicode/utext.h"
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
30 
31 #include "unicode/localpointer.h"
32 #include "unicode/parseerr.h"
33 
34 struct URegularExpression;
41 
42 
47 typedef enum URegexpFlag{
48 
49 #ifndef U_HIDE_DRAFT_API
50 
54 #endif /* U_HIDE_DRAFT_API */
55 
57 
60 
64 
76 
82 
89 
97  UREGEX_UWORD = 256,
98 
107 
108 } URegexpFlag;
109 
132 U_STABLE URegularExpression * U_EXPORT2
133 uregex_open( const UChar *pattern,
134  int32_t patternLength,
135  uint32_t flags,
136  UParseError *pe,
137  UErrorCode *status);
138 
162 U_STABLE URegularExpression * U_EXPORT2
163 uregex_openUText(UText *pattern,
164  uint32_t flags,
165  UParseError *pe,
166  UErrorCode *status);
167 
191 #if !UCONFIG_NO_CONVERSION
192 U_STABLE URegularExpression * U_EXPORT2
193 uregex_openC( const char *pattern,
194  uint32_t flags,
195  UParseError *pe,
196  UErrorCode *status);
197 #endif
198 
199 
200 
208 U_STABLE void U_EXPORT2
210 
211 #if U_SHOW_CPLUSPLUS_API
212 
214 
225 
227 
228 #endif
229 
248 U_STABLE URegularExpression * U_EXPORT2
249 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
250 
268 U_STABLE const UChar * U_EXPORT2
269 uregex_pattern(const URegularExpression *regexp,
270  int32_t *patLength,
271  UErrorCode *status);
272 
284 U_STABLE UText * U_EXPORT2
286  UErrorCode *status);
287 
296 U_STABLE int32_t U_EXPORT2
297 uregex_flags(const URegularExpression *regexp,
298  UErrorCode *status);
299 
300 
321 U_STABLE void U_EXPORT2
323  const UChar *text,
324  int32_t textLength,
325  UErrorCode *status);
326 
327 
344 U_STABLE void U_EXPORT2
346  UText *text,
347  UErrorCode *status);
348 
369 U_STABLE const UChar * U_EXPORT2
371  int32_t *textLength,
372  UErrorCode *status);
373 
390 U_STABLE UText * U_EXPORT2
392  UText *dest,
393  UErrorCode *status);
394 
420 U_STABLE void U_EXPORT2
422  UText *text,
423  UErrorCode *status);
424 
445 U_STABLE UBool U_EXPORT2
447  int32_t startIndex,
448  UErrorCode *status);
449 
471 U_STABLE UBool U_EXPORT2
473  int64_t startIndex,
474  UErrorCode *status);
475 
499 U_STABLE UBool U_EXPORT2
501  int32_t startIndex,
502  UErrorCode *status);
503 
528 U_STABLE UBool U_EXPORT2
530  int64_t startIndex,
531  UErrorCode *status);
532 
552 U_STABLE UBool U_EXPORT2
554  int32_t startIndex,
555  UErrorCode *status);
556 
577 U_STABLE UBool U_EXPORT2
579  int64_t startIndex,
580  UErrorCode *status);
581 
595 U_STABLE UBool U_EXPORT2
597  UErrorCode *status);
598 
606 U_STABLE int32_t U_EXPORT2
608  UErrorCode *status);
609 
626 U_STABLE int32_t U_EXPORT2
628  const UChar *groupName,
629  int32_t nameLength,
630  UErrorCode *status);
631 
632 
650 U_STABLE int32_t U_EXPORT2
652  const char *groupName,
653  int32_t nameLength,
654  UErrorCode *status);
655 
672 U_STABLE int32_t U_EXPORT2
674  int32_t groupNum,
675  UChar *dest,
676  int32_t destCapacity,
677  UErrorCode *status);
678 
701 U_STABLE UText * U_EXPORT2
703  int32_t groupNum,
704  UText *dest,
705  int64_t *groupLength,
706  UErrorCode *status);
707 
722 U_STABLE int32_t U_EXPORT2
724  int32_t groupNum,
725  UErrorCode *status);
726 
742 U_STABLE int64_t U_EXPORT2
744  int32_t groupNum,
745  UErrorCode *status);
746 
760 U_STABLE int32_t U_EXPORT2
762  int32_t groupNum,
763  UErrorCode *status);
764 
779 U_STABLE int64_t U_EXPORT2
781  int32_t groupNum,
782  UErrorCode *status);
783 
797 U_STABLE void U_EXPORT2
799  int32_t index,
800  UErrorCode *status);
801 
816 U_STABLE void U_EXPORT2
818  int64_t index,
819  UErrorCode *status);
820 
841 U_STABLE void U_EXPORT2
843  int32_t regionStart,
844  int32_t regionLimit,
845  UErrorCode *status);
846 
868 U_STABLE void U_EXPORT2
870  int64_t regionStart,
871  int64_t regionLimit,
872  UErrorCode *status);
873 
888 U_STABLE void U_EXPORT2
890  int64_t regionStart,
891  int64_t regionLimit,
892  int64_t startIndex,
893  UErrorCode *status);
894 
904 U_STABLE int32_t U_EXPORT2
906  UErrorCode *status);
907 
918 U_STABLE int64_t U_EXPORT2
920  UErrorCode *status);
921 
932 U_STABLE int32_t U_EXPORT2
934  UErrorCode *status);
935 
947 U_STABLE int64_t U_EXPORT2
949  UErrorCode *status);
950 
961 U_STABLE UBool U_EXPORT2
963  UErrorCode *status);
964 
965 
985 U_STABLE void U_EXPORT2
987  UBool b,
988  UErrorCode *status);
989 
990 
1000 U_STABLE UBool U_EXPORT2
1002  UErrorCode *status);
1003 
1004 
1018 U_STABLE void U_EXPORT2
1020  UBool b,
1021  UErrorCode *status);
1022 
1033 U_STABLE UBool U_EXPORT2
1034 uregex_hitEnd(const URegularExpression *regexp,
1035  UErrorCode *status);
1036 
1048 U_STABLE UBool U_EXPORT2
1050  UErrorCode *status);
1051 
1052 
1053 
1054 
1055 
1080 U_STABLE int32_t U_EXPORT2
1082  const UChar *replacementText,
1083  int32_t replacementLength,
1084  UChar *destBuf,
1085  int32_t destCapacity,
1086  UErrorCode *status);
1087 
1109 U_STABLE UText * U_EXPORT2
1111  UText *replacement,
1112  UText *dest,
1113  UErrorCode *status);
1114 
1139 U_STABLE int32_t U_EXPORT2
1141  const UChar *replacementText,
1142  int32_t replacementLength,
1143  UChar *destBuf,
1144  int32_t destCapacity,
1145  UErrorCode *status);
1146 
1168 U_STABLE UText * U_EXPORT2
1170  UText *replacement,
1171  UText *dest,
1172  UErrorCode *status);
1173 
1220 U_STABLE int32_t U_EXPORT2
1222  const UChar *replacementText,
1223  int32_t replacementLength,
1224  UChar **destBuf,
1225  int32_t *destCapacity,
1226  UErrorCode *status);
1227 
1250 U_STABLE void U_EXPORT2
1252  UText *replacementText,
1253  UText *dest,
1254  UErrorCode *status);
1255 
1280 U_STABLE int32_t U_EXPORT2
1282  UChar **destBuf,
1283  int32_t *destCapacity,
1284  UErrorCode *status);
1285 
1304 U_STABLE UText * U_EXPORT2
1306  UText *dest,
1307  UErrorCode *status);
1308 
1360 U_STABLE int32_t U_EXPORT2
1362  UChar *destBuf,
1363  int32_t destCapacity,
1364  int32_t *requiredCapacity,
1365  UChar *destFields[],
1366  int32_t destFieldsCapacity,
1367  UErrorCode *status);
1368 
1395 U_STABLE int32_t U_EXPORT2
1397  UText *destFields[],
1398  int32_t destFieldsCapacity,
1399  UErrorCode *status);
1400 
1423 U_STABLE void U_EXPORT2
1425  int32_t limit,
1426  UErrorCode *status);
1427 
1437 U_STABLE int32_t U_EXPORT2
1439  UErrorCode *status);
1440 
1461 U_STABLE void U_EXPORT2
1463  int32_t limit,
1464  UErrorCode *status);
1465 
1473 U_STABLE int32_t U_EXPORT2
1475  UErrorCode *status);
1476 
1477 
1498  const void *context,
1499  int32_t steps);
1501 
1516 U_STABLE void U_EXPORT2
1518  URegexMatchCallback *callback,
1519  const void *context,
1520  UErrorCode *status);
1521 
1522 
1534 U_STABLE void U_EXPORT2
1536  URegexMatchCallback **callback,
1537  const void **context,
1538  UErrorCode *status);
1539 
1572  const void *context,
1573  int64_t matchIndex);
1575 
1576 
1588 U_STABLE void U_EXPORT2
1590  URegexFindProgressCallback *callback,
1591  const void *context,
1592  UErrorCode *status);
1593 
1605 U_STABLE void U_EXPORT2
1607  URegexFindProgressCallback **callback,
1608  const void **context,
1609  UErrorCode *status);
1610 
1611 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1612 #endif /* UREGEX_H */
uregex_groupCount
int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
uregex_openUText
URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
uregex_hitEnd
UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE if the most recent matching operation touched the end of the text being processed.
uregex_reset
void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
uregex_split
int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
URegexMatchCallback
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1497
parseerr.h
C API: Parse Error Information.
uregex_groupNumberFromCName
int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
uregex_close
void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
UREGEX_UNIX_LINES
@ UREGEX_UNIX_LINES
Unix-only line endings.
Definition: uregex.h:88
uregex_replaceAllUText
UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
uregex_openC
URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
UREGEX_CASE_INSENSITIVE
@ UREGEX_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uregex.h:56
uregex_requireEnd
UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE the most recent match succeeded and additional input could cause it to fail.
uregex_setFindProgressCallback
void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
uregex_getUText
UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uregex_regionStart
int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
uregex_groupUText
UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
uregex_getStackLimit
int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
uregex_matches64
UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
uregex_end64
int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
U_CALLCONV
#define U_CALLCONV
Definition: platform.h:849
utext.h
C API: Abstract Unicode Text API.
uregex_setRegionAndStart
void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation.
uregex_find64
UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
uregex_refreshUText
void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
uregex_useAnchoringBounds
void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
uregex_getText
const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
uregex_setTimeLimit
void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
uregex_setStackLimit
void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack.
LocalURegularExpressionPointer
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
uregex_hasTransparentBounds
UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
UREGEX_LITERAL
@ UREGEX_LITERAL
If set, treat the entire pattern as a literal string.
Definition: uregex.h:75
uregex_regionEnd
int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
uregex_pattern
const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
uregex_groupNumberFromName
int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
uregex_find
UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
uregex_replaceFirstUText
UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
UREGEX_CANON_EQ
@ UREGEX_CANON_EQ
Forces normalization of pattern and strings.
Definition: uregex.h:53
uregex_start64
int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
uregex_setMatchCallback
void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
uregex_clone
URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
URegexpFlag
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:47
uregex_group
int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
uregex_setRegion64
void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
uregex_regionStart64
int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
U_CDECL_END
#define U_CDECL_END
Definition: umachine.h:83
U_CDECL_BEGIN
#define U_CDECL_BEGIN
Definition: umachine.h:82
uregex_splitUText
int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
uregex_lookingAt
UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern.
UText
UText struct.
Definition: utext.h:1343
uregex_setUText
void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
uregex_matches
UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
uregex_open
URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
uregex_appendReplacement
int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
UREGEX_DOTALL
@ UREGEX_DOTALL
If set, '.
Definition: uregex.h:63
uregex_findNext
UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
U_DEFINE_LOCAL_OPEN_POINTER
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
Definition: localpointer.h:539
URegularExpression
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation.
Definition: uregex.h:40
UREGEX_MULTILINE
@ UREGEX_MULTILINE
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise,...
Definition: uregex.h:81
uregex_start
int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
uregex_getFindProgressCallback
void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
uregex_end
int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
localpointer.h
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
uregex_replaceFirst
int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
UREGEX_UWORD
@ UREGEX_UWORD
Unicode word boundaries.
Definition: uregex.h:97
uregex_appendTailUText
UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
U_STABLE
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
uregex_getTimeLimit
int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
uregex_setRegion
void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
uregex_flags
int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
uregex_appendTail
int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
uregex_hasAnchoringBounds
UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
uregex_getMatchCallback
void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
uregex_appendReplacementUText
void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
uregex_patternUText
UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
UChar
uint16_t UChar
Definition: umachine.h:312
uregex_setText
void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
uregex_reset64
void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
U_NAMESPACE_END
#define U_NAMESPACE_END
Definition: uversion.h:130
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
Definition: uversion.h:129
UREGEX_COMMENTS
@ UREGEX_COMMENTS
Allow white space and comments within patterns.
Definition: uregex.h:59
uregex_regionEnd64
int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
URegexFindProgressCallback
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1571
UREGEX_ERROR_ON_UNKNOWN_ESCAPES
@ UREGEX_ERROR_ON_UNKNOWN_ESCAPES
Error on Unrecognized backslash escapes.
Definition: uregex.h:106
uregex_useTransparentBounds
void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
uregex_replaceAll
int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
uregex_lookingAt64
UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.