ICU 57.1 57.1
utext.h
Go to the documentation of this file.
1/*
2*******************************************************************************
3*
4* Copyright (C) 2004-2012, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: utext.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2004oct06
14* created by: Markus W. Scherer
15*/
16
17#ifndef __UTEXT_H__
18#define __UTEXT_H__
19
135
136
137
138#include "unicode/utypes.h"
139#include "unicode/uchar.h"
140#if U_SHOW_CPLUSPLUS_API
141#include "unicode/localpointer.h"
142#include "unicode/rep.h"
143#include "unicode/unistr.h"
144#include "unicode/chariter.h"
145#endif
146
147
149
150struct UText;
151typedef struct UText UText;
152
153
154/***************************************************************************************
155 *
156 * C Functions for creating UText wrappers around various kinds of text strings.
157 *
158 ****************************************************************************************/
159
160
181U_STABLE UText * U_EXPORT2
183
184#if U_SHOW_CPLUSPLUS_API
185
187
198
200
201#endif
202
224U_STABLE UText * U_EXPORT2
225utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
226
227
242U_STABLE UText * U_EXPORT2
243utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
244
245
246#if U_SHOW_CPLUSPLUS_API
259U_STABLE UText * U_EXPORT2
260utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
261
262
275U_STABLE UText * U_EXPORT2
276utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
277
278
291U_STABLE UText * U_EXPORT2
292utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
293
306U_STABLE UText * U_EXPORT2
307utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
308
309#endif
310
311
369U_STABLE UText * U_EXPORT2
370utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
371
372
384U_STABLE UBool U_EXPORT2
385utext_equals(const UText *a, const UText *b);
386
387
388/*****************************************************************************
389 *
390 * Functions to work with the text represeted by a UText wrapper
391 *
392 *****************************************************************************/
393
405U_STABLE int64_t U_EXPORT2
407
421U_STABLE UBool U_EXPORT2
423
449U_STABLE UChar32 U_EXPORT2
450utext_char32At(UText *ut, int64_t nativeIndex);
451
452
463U_STABLE UChar32 U_EXPORT2
465
466
485U_STABLE UChar32 U_EXPORT2
487
488
506U_STABLE UChar32 U_EXPORT2
508
509
528U_STABLE UChar32 U_EXPORT2
529utext_next32From(UText *ut, int64_t nativeIndex);
530
531
532
548U_STABLE UChar32 U_EXPORT2
549utext_previous32From(UText *ut, int64_t nativeIndex);
550
563U_STABLE int64_t U_EXPORT2
565
589U_STABLE void U_EXPORT2
590utext_setNativeIndex(UText *ut, int64_t nativeIndex);
591
608U_STABLE UBool U_EXPORT2
609utext_moveIndex32(UText *ut, int32_t delta);
610
633U_STABLE int64_t U_EXPORT2
635
636
671U_STABLE int32_t U_EXPORT2
673 int64_t nativeStart, int64_t nativeLimit,
674 UChar *dest, int32_t destCapacity,
675 UErrorCode *status);
676
677
678
679/************************************************************************************
680 *
681 * #define inline versions of selected performance-critical text access functions
682 * Caution: do not use auto increment++ or decrement-- expressions
683 * as parameters to these macros.
684 *
685 * For most use, where there is no extreme performance constraint, the
686 * normal, non-inline functions are a better choice. The resulting code
687 * will be smaller, and, if the need ever arises, easier to debug.
688 *
689 * These are implemented as #defines rather than real functions
690 * because there is no fully portable way to do inline functions in plain C.
691 *
692 ************************************************************************************/
693
694#ifndef U_HIDE_INTERNAL_API
704#define UTEXT_CURRENT32(ut) \
705 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
706 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
707#endif /* U_HIDE_INTERNAL_API */
708
720#define UTEXT_NEXT32(ut) \
721 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
722 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
723
734#define UTEXT_PREVIOUS32(ut) \
735 ((ut)->chunkOffset > 0 && \
736 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
737 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
738
751#define UTEXT_GETNATIVEINDEX(ut) \
752 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
753 (ut)->chunkNativeStart+(ut)->chunkOffset : \
754 (ut)->pFuncs->mapOffsetToNative(ut))
755
767#define UTEXT_SETNATIVEINDEX(ut, ix) \
768 { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
769 if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
770 (ut)->chunkOffset=(int32_t)__offset; \
771 } else { \
772 utext_setNativeIndex((ut), (ix)); } }
773
774
775
776/************************************************************************************
777 *
778 * Functions related to writing or modifying the text.
779 * These will work only with modifiable UTexts. Attempting to
780 * modify a read-only UText will return an error status.
781 *
782 ************************************************************************************/
783
784
803U_STABLE UBool U_EXPORT2
805
806
815U_STABLE UBool U_EXPORT2
817
818
846U_STABLE int32_t U_EXPORT2
848 int64_t nativeStart, int64_t nativeLimit,
849 const UChar *replacementText, int32_t replacementLength,
850 UErrorCode *status);
851
852
853
886U_STABLE void U_EXPORT2
888 int64_t nativeStart, int64_t nativeLimit,
889 int64_t destIndex,
890 UBool move,
891 UErrorCode *status);
892
893
915U_STABLE void U_EXPORT2
917
918
925enum {
959};
960
998typedef UText * U_CALLCONV
999UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1000
1001
1010typedef int64_t U_CALLCONV
1012
1038typedef UBool U_CALLCONV
1039UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1040
1068typedef int32_t U_CALLCONV
1070 int64_t nativeStart, int64_t nativeLimit,
1071 UChar *dest, int32_t destCapacity,
1072 UErrorCode *status);
1073
1103typedef int32_t U_CALLCONV
1105 int64_t nativeStart, int64_t nativeLimit,
1106 const UChar *replacementText, int32_t replacmentLength,
1107 UErrorCode *status);
1108
1137typedef void U_CALLCONV
1139 int64_t nativeStart, int64_t nativeLimit,
1140 int64_t nativeDest,
1141 UBool move,
1142 UErrorCode *status);
1143
1157typedef int64_t U_CALLCONV
1159
1175typedef int32_t U_CALLCONV
1176UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1177
1178
1196typedef void U_CALLCONV
1198
1199
1326
1330typedef struct UTextFuncs UTextFuncs;
1331
1343struct UText {
1356 uint32_t magic;
1357
1358
1364 int32_t flags;
1365
1366
1373
1381
1382 /* ------ 16 byte alignment boundary ----------- */
1383
1384
1391
1396 int32_t extraSize;
1397
1406
1407 /* ---- 16 byte alignment boundary------ */
1408
1414
1421
1427
1428 /* ---- 16 byte alignment boundary-- */
1429
1430
1438
1444
1450 void *pExtra;
1451
1458 const void *context;
1459
1460 /* --- 16 byte alignment boundary--- */
1461
1467 const void *p;
1473 const void *q;
1479 const void *r;
1480
1486 void *privP;
1487
1488
1489 /* --- 16 byte alignment boundary--- */
1490
1491
1497 int64_t a;
1498
1504 int32_t b;
1505
1511 int32_t c;
1512
1513 /* ---- 16 byte alignment boundary---- */
1514
1515
1521 int64_t privA;
1527 int32_t privB;
1533 int32_t privC;
1534};
1535
1536
1553U_STABLE UText * U_EXPORT2
1554utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1555
1556#ifndef U_HIDE_INTERNAL_API
1562enum {
1563 UTEXT_MAGIC = 0x345ad82c
1564};
1565#endif /* U_HIDE_INTERNAL_API */
1566
1574#define UTEXT_INITIALIZER { \
1575 UTEXT_MAGIC, /* magic */ \
1576 0, /* flags */ \
1577 0, /* providerProps */ \
1578 sizeof(UText), /* sizeOfStruct */ \
1579 0, /* chunkNativeLimit */ \
1580 0, /* extraSize */ \
1581 0, /* nativeIndexingLimit */ \
1582 0, /* chunkNativeStart */ \
1583 0, /* chunkOffset */ \
1584 0, /* chunkLength */ \
1585 NULL, /* chunkContents */ \
1586 NULL, /* pFuncs */ \
1587 NULL, /* pExtra */ \
1588 NULL, /* context */ \
1589 NULL, NULL, NULL, /* p, q, r */ \
1590 NULL, /* privP */ \
1591 0, 0, 0, /* a, b, c */ \
1592 0, 0, 0 /* privA,B,C, */ \
1593 }
1594
1595
1597
1598
1599
1600#endif
C++ API: Character Iterator.
"Smart pointer" class, closes a UText via utext_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition platform.h:848
C++ API: Replaceable String.
(public) Function dispatch table for UText.
Definition utext.h:1209
int32_t reserved1
(private) Alignment padding.
Definition utext.h:1231
int32_t reserved2
Definition utext.h:1231
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition utext.h:1289
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition utext.h:1257
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition utext.h:1265
int32_t reserved3
Definition utext.h:1231
UTextClose * spare3
(private) Spare function pointer
Definition utext.h:1323
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accomodate...
Definition utext.h:1224
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition utext.h:1249
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition utext.h:1273
UTextClose * spare2
(private) Spare function pointer
Definition utext.h:1317
UTextClose * spare1
(private) Spare function pointer
Definition utext.h:1311
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition utext.h:1281
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition utext.h:1297
UTextClose * close
(public) Function pointer for UTextClose.
Definition utext.h:1305
UTextClone * clone
(public) Function pointer for UTextClone
Definition utext.h:1240
UText struct.
Definition utext.h:1343
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition utext.h:1504
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition utext.h:1420
const void * p
(protected) Pointer fields available for use by the text provider.
Definition utext.h:1467
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition utext.h:1396
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition utext.h:1521
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond.
Definition utext.h:1405
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText.
Definition utext.h:1364
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition utext.h:1380
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition utext.h:1450
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition utext.h:1390
const void * r
(protected) Pointer fields available for use by the text provider.
Definition utext.h:1479
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition utext.h:1527
uint32_t magic
(private) Magic.
Definition utext.h:1356
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition utext.h:1443
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition utext.h:1426
int32_t providerProperties
Text provider properties.
Definition utext.h:1372
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition utext.h:1437
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition utext.h:1533
const void * q
(protected) Pointer fields available for use by the text provider.
Definition utext.h:1473
void * privP
Private field reserved for future use by the UText framework itself.
Definition utext.h:1486
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition utext.h:1413
const void * context
(protected) Pointer to string or text-containin object or similar.
Definition utext.h:1458
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition utext.h:1511
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition utext.h:1497
C API: Unicode Properties.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:332
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition umachine.h:83
int8_t UBool
The ICU boolean type.
Definition umachine.h:234
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition umachine.h:82
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:109
C++ API: Unicode String.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition utext.h:1104
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
void utext_freeze(UText *ut)
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition utext.h:999
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceeding the current position.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition utext.h:1011
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition utext.h:1069
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition utext.h:1197
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UText * utext_close(UText *ut)
Close function for UText instances.
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index.
@ UTEXT_PROVIDER_HAS_META_DATA
There is meta data associated with the text.
Definition utext.h:950
@ UTEXT_PROVIDER_STABLE_CHUNKS
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition utext.h:937
@ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE
It is potentially time consuming for the provider to determine the length of the text.
Definition utext.h:930
@ UTEXT_PROVIDER_OWNS_TEXT
Text provider owns the text storage.
Definition utext.h:958
@ UTEXT_PROVIDER_WRITABLE
The provider supports modifying the text via the replace() and copy() functions.
Definition utext.h:944
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition utext.h:1039
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition utext.h:1158
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition utext.h:1176
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition utext.h:1138
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position,...
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator postion by delta code points.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:129