00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __UCHARSTRIE_H__
00018 #define __UCHARSTRIE_H__
00019
00026 #include "unicode/utypes.h"
00027
00028 #if U_SHOW_CPLUSPLUS_API
00029
00030 #include "unicode/unistr.h"
00031 #include "unicode/uobject.h"
00032 #include "unicode/ustringtrie.h"
00033
00034 U_NAMESPACE_BEGIN
00035
00036 class Appendable;
00037 class UCharsTrieBuilder;
00038 class UVector32;
00039
00053 class U_COMMON_API UCharsTrie : public UMemory {
00054 public:
00069 UCharsTrie(ConstChar16Ptr trieUChars)
00070 : ownedArray_(NULL), uchars_(trieUChars),
00071 pos_(uchars_), remainingMatchLength_(-1) {}
00072
00077 ~UCharsTrie();
00078
00085 UCharsTrie(const UCharsTrie &other)
00086 : ownedArray_(NULL), uchars_(other.uchars_),
00087 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
00088
00094 UCharsTrie &reset() {
00095 pos_=uchars_;
00096 remainingMatchLength_=-1;
00097 return *this;
00098 }
00099
00108 uint64_t getState64() const {
00109 return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
00110 (uint64_t)(pos_ - uchars_);
00111 }
00112
00127 UCharsTrie &resetToState64(uint64_t state) {
00128 remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
00129 pos_ = uchars_ + (state & kState64PosMask);
00130 return *this;
00131 }
00132
00138 class State : public UMemory {
00139 public:
00144 State() { uchars=NULL; }
00145 private:
00146 friend class UCharsTrie;
00147
00148 const char16_t *uchars;
00149 const char16_t *pos;
00150 int32_t remainingMatchLength;
00151 };
00152
00160 const UCharsTrie &saveState(State &state) const {
00161 state.uchars=uchars_;
00162 state.pos=pos_;
00163 state.remainingMatchLength=remainingMatchLength_;
00164 return *this;
00165 }
00166
00177 UCharsTrie &resetToState(const State &state) {
00178 if(uchars_==state.uchars && uchars_!=NULL) {
00179 pos_=state.pos;
00180 remainingMatchLength_=state.remainingMatchLength;
00181 }
00182 return *this;
00183 }
00184
00191 UStringTrieResult current() const;
00192
00200 inline UStringTrieResult first(int32_t uchar) {
00201 remainingMatchLength_=-1;
00202 return nextImpl(uchars_, uchar);
00203 }
00204
00213 UStringTrieResult firstForCodePoint(UChar32 cp);
00214
00221 UStringTrieResult next(int32_t uchar);
00222
00230 UStringTrieResult nextForCodePoint(UChar32 cp);
00231
00247 UStringTrieResult next(ConstChar16Ptr s, int32_t length);
00248
00258 inline int32_t getValue() const {
00259 const char16_t *pos=pos_;
00260 int32_t leadUnit=*pos++;
00261
00262 return leadUnit&kValueIsFinal ?
00263 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
00264 }
00265
00275 inline UBool hasUniqueValue(int32_t &uniqueValue) const {
00276 const char16_t *pos=pos_;
00277
00278 return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
00279 }
00280
00288 int32_t getNextUChars(Appendable &out) const;
00289
00294 class U_COMMON_API Iterator : public UMemory {
00295 public:
00307 Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
00308
00320 Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
00321
00326 ~Iterator();
00327
00333 Iterator &reset();
00334
00339 UBool hasNext() const;
00340
00355 UBool next(UErrorCode &errorCode);
00356
00361 const UnicodeString &getString() const { return str_; }
00366 int32_t getValue() const { return value_; }
00367
00368 private:
00369 UBool truncateAndStop() {
00370 pos_=NULL;
00371 value_=-1;
00372 return true;
00373 }
00374
00375 const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
00376
00377 const char16_t *uchars_;
00378 const char16_t *pos_;
00379 const char16_t *initialPos_;
00380 int32_t remainingMatchLength_;
00381 int32_t initialRemainingMatchLength_;
00382 UBool skipValue_;
00383
00384 UnicodeString str_;
00385 int32_t maxLength_;
00386 int32_t value_;
00387
00388
00389
00390
00391
00392
00393
00394
00395 UVector32 *stack_;
00396 };
00397
00398 private:
00399 friend class UCharsTrieBuilder;
00400
00407 UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
00408 : ownedArray_(adoptUChars), uchars_(trieUChars),
00409 pos_(uchars_), remainingMatchLength_(-1) {}
00410
00411
00412 UCharsTrie &operator=(const UCharsTrie &other);
00413
00414 inline void stop() {
00415 pos_=NULL;
00416 }
00417
00418
00419
00420 static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
00421 int32_t value;
00422 if(leadUnit<kMinTwoUnitValueLead) {
00423 value=leadUnit;
00424 } else if(leadUnit<kThreeUnitValueLead) {
00425 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
00426 } else {
00427 value=(pos[0]<<16)|pos[1];
00428 }
00429 return value;
00430 }
00431 static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
00432 if(leadUnit>=kMinTwoUnitValueLead) {
00433 if(leadUnit<kThreeUnitValueLead) {
00434 ++pos;
00435 } else {
00436 pos+=2;
00437 }
00438 }
00439 return pos;
00440 }
00441 static inline const char16_t *skipValue(const char16_t *pos) {
00442 int32_t leadUnit=*pos++;
00443 return skipValue(pos, leadUnit&0x7fff);
00444 }
00445
00446 static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
00447
00448 int32_t value;
00449 if(leadUnit<kMinTwoUnitNodeValueLead) {
00450 value=(leadUnit>>6)-1;
00451 } else if(leadUnit<kThreeUnitNodeValueLead) {
00452 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
00453 } else {
00454 value=(pos[0]<<16)|pos[1];
00455 }
00456 return value;
00457 }
00458 static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
00459
00460 if(leadUnit>=kMinTwoUnitNodeValueLead) {
00461 if(leadUnit<kThreeUnitNodeValueLead) {
00462 ++pos;
00463 } else {
00464 pos+=2;
00465 }
00466 }
00467 return pos;
00468 }
00469
00470 static inline const char16_t *jumpByDelta(const char16_t *pos) {
00471 int32_t delta=*pos++;
00472 if(delta>=kMinTwoUnitDeltaLead) {
00473 if(delta==kThreeUnitDeltaLead) {
00474 delta=(pos[0]<<16)|pos[1];
00475 pos+=2;
00476 } else {
00477 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
00478 }
00479 }
00480 return pos+delta;
00481 }
00482
00483 static const char16_t *skipDelta(const char16_t *pos) {
00484 int32_t delta=*pos++;
00485 if(delta>=kMinTwoUnitDeltaLead) {
00486 if(delta==kThreeUnitDeltaLead) {
00487 pos+=2;
00488 } else {
00489 ++pos;
00490 }
00491 }
00492 return pos;
00493 }
00494
00495 static inline UStringTrieResult valueResult(int32_t node) {
00496 return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
00497 }
00498
00499
00500 UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
00501
00502
00503 UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
00504
00505
00506
00507
00508 static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
00509 UBool haveUniqueValue, int32_t &uniqueValue);
00510
00511
00512 static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
00513
00514
00515
00516 static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561 static const int32_t kMaxBranchLinearSubNodeLength=5;
00562
00563
00564 static const int32_t kMinLinearMatch=0x30;
00565 static const int32_t kMaxLinearMatchLength=0x10;
00566
00567
00568
00569
00570 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
00571 static const int32_t kNodeTypeMask=kMinValueLead-1;
00572
00573
00574 static const int32_t kValueIsFinal=0x8000;
00575
00576
00577 static const int32_t kMaxOneUnitValue=0x3fff;
00578
00579 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
00580 static const int32_t kThreeUnitValueLead=0x7fff;
00581
00582 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
00583
00584
00585 static const int32_t kMaxOneUnitNodeValue=0xff;
00586 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
00587 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
00588
00589 static const int32_t kMaxTwoUnitNodeValue=
00590 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
00591
00592
00593 static const int32_t kMaxOneUnitDelta=0xfbff;
00594 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
00595 static const int32_t kThreeUnitDeltaLead=0xffff;
00596
00597 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
00598
00599
00600
00601
00602
00603 static constexpr int32_t kState64RemainingShift = 59;
00604 static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
00605
00606 char16_t *ownedArray_;
00607
00608
00609 const char16_t *uchars_;
00610
00611
00612
00613
00614 const char16_t *pos_;
00615
00616 int32_t remainingMatchLength_;
00617 };
00618
00619 U_NAMESPACE_END
00620
00621 #endif
00622
00623 #endif // __UCHARSTRIE_H__