ICU 57.1 57.1
ubrk.h
Go to the documentation of this file.
1/*
2******************************************************************************
3* Copyright (C) 1996-2015, International Business Machines Corporation and others.
4* All Rights Reserved.
5******************************************************************************
6*/
7
8#ifndef UBRK_H
9#define UBRK_H
10
11#include "unicode/utypes.h"
12#include "unicode/uloc.h"
13#include "unicode/utext.h"
15
20#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
21# define UBRK_TYPEDEF_UBREAK_ITERATOR
27#endif
28
29#if !UCONFIG_NO_BREAK_ITERATION
30
31#include "unicode/parseerr.h"
32
97typedef enum UBreakIteratorType {
106
107#ifndef U_HIDE_DEPRECATED_API
117#endif /* U_HIDE_DEPRECATED_API */
118 UBRK_COUNT = 5
120
124#define UBRK_DONE ((int32_t) -1)
125
126
135typedef enum UWordBreak {
159
168typedef enum ULineBreakTag {
179
180
181
190typedef enum USentenceBreakTag {
207
208
225U_STABLE UBreakIterator* U_EXPORT2
227 const char *locale,
228 const UChar *text,
229 int32_t textLength,
230 UErrorCode *status);
231
247U_STABLE UBreakIterator* U_EXPORT2
249 int32_t rulesLength,
250 const UChar *text,
251 int32_t textLength,
252 UParseError *parseErr,
253 UErrorCode *status);
254
273U_STABLE UBreakIterator * U_EXPORT2
275 const UBreakIterator *bi,
276 void *stackBuffer,
277 int32_t *pBufferSize,
278 UErrorCode *status);
279
280#ifndef U_HIDE_DEPRECATED_API
281
286#define U_BRK_SAFECLONE_BUFFERSIZE 1
287
288#endif /* U_HIDE_DEPRECATED_API */
289
296U_STABLE void U_EXPORT2
298
299#if U_SHOW_CPLUSPLUS_API
300
302
313
315
316#endif
317
326U_STABLE void U_EXPORT2
328 const UChar* text,
329 int32_t textLength,
330 UErrorCode* status);
331
332
350U_STABLE void U_EXPORT2
352 UText* text,
353 UErrorCode* status);
354
355
356
365U_STABLE int32_t U_EXPORT2
367
377U_STABLE int32_t U_EXPORT2
379
389U_STABLE int32_t U_EXPORT2
391
399U_STABLE int32_t U_EXPORT2
401
411U_STABLE int32_t U_EXPORT2
413
423U_STABLE int32_t U_EXPORT2
425 int32_t offset);
426
436U_STABLE int32_t U_EXPORT2
438 int32_t offset);
439
449U_STABLE const char* U_EXPORT2
450ubrk_getAvailable(int32_t index);
451
460U_STABLE int32_t U_EXPORT2
462
463
473U_STABLE UBool U_EXPORT2
474ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
475
485U_STABLE int32_t U_EXPORT2
487
505U_STABLE int32_t U_EXPORT2
506ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
507
517U_STABLE const char* U_EXPORT2
519
545U_STABLE void U_EXPORT2
547 UText *text,
548 UErrorCode *status);
549
550#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
551
552#endif
"Smart pointer" class, closes a UBreakIterator via ubrk_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:539
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
UText struct.
Definition: utext.h:1343
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:97
@ UBRK_WORD
Word breaks.
Definition: ubrk.h:101
@ UBRK_TITLE
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
Definition: ubrk.h:116
@ UBRK_CHARACTER
Character breaks.
Definition: ubrk.h:99
@ UBRK_LINE
Line breaks.
Definition: ubrk.h:103
@ UBRK_SENTENCE
Sentence breaks.
Definition: ubrk.h:105
int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to zero, the start of the text being scanned.
int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specfied position is a boundary position.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
Definition: ubrk.h:168
@ UBRK_LINE_SOFT
Tag value for soft line breaks, positions at which a line break is acceptable but not required
Definition: ubrk.h:171
@ UBRK_LINE_HARD
Tag value for a hard, or mandatory line break
Definition: ubrk.h:175
@ UBRK_LINE_HARD_LIMIT
Upper bound for hard line breaks.
Definition: ubrk.h:177
@ UBRK_LINE_SOFT_LIMIT
Upper bound for soft line breaks.
Definition: ubrk.h:173
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position.
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Definition: ubrk.h:190
@ UBRK_SENTENCE_TERM_LIMIT
Upper bound for tags for sentences ended by sentence terminators.
Definition: ubrk.h:197
@ UBRK_SENTENCE_SEP
Tag value for for sentences that do not contain an ending sentence terminator ('.
Definition: ubrk.h:202
@ UBRK_SENTENCE_TERM
Tag value for for sentences ending with a sentence terminator ('.
Definition: ubrk.h:195
@ UBRK_SENTENCE_SEP_LIMIT
Upper bound for tags for sentences ended by a separator.
Definition: ubrk.h:204
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:26
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
Definition: ubrk.h:135
@ UBRK_WORD_IDEO
Tag value for words containing ideographic characters, lower limit.
Definition: ubrk.h:155
@ UBRK_WORD_NUMBER
Tag value for words that appear to be numbers, lower limit.
Definition: ubrk.h:142
@ UBRK_WORD_NONE_LIMIT
Upper bound for tags for uncategorized words.
Definition: ubrk.h:140
@ UBRK_WORD_LETTER_LIMIT
Tag value for words containing letters, upper limit
Definition: ubrk.h:149
@ UBRK_WORD_KANA
Tag value for words containing kana characters, lower limit.
Definition: ubrk.h:151
@ UBRK_WORD_KANA_LIMIT
Tag value for words containing kana characters, upper limit.
Definition: ubrk.h:153
@ UBRK_WORD_IDEO_LIMIT
Tag value for words containing ideographic characters, upper limit.
Definition: ubrk.h:157
@ UBRK_WORD_NONE
Tag value for "words" that do not fit into any of other categories.
Definition: ubrk.h:138
@ UBRK_WORD_NUMBER_LIMIT
Tag value for words that appear to be numbers, upper limit.
Definition: ubrk.h:144
@ UBRK_WORD_LETTER
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters,...
Definition: ubrk.h:147
C API: Locale.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:336
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129