ICU 57.1 57.1
messagepattern.h
Go to the documentation of this file.
1/*
2*******************************************************************************
3* Copyright (C) 2011-2013, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: messagepattern.h
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2011mar14
12* created by: Markus W. Scherer
13*/
14
15#ifndef __MESSAGEPATTERN_H__
16#define __MESSAGEPATTERN_H__
17
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27#include "unicode/parseerr.h"
28#include "unicode/unistr.h"
29
88};
93
208
261
268#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
269 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
270
271enum {
278
288
295#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
296
298
299class MessagePatternDoubleList;
300class MessagePatternPartsList;
301
359public:
369
380
399 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
400
407
415
421
440 UParseError *parseError, UErrorCode &errorCode);
441
460 UParseError *parseError, UErrorCode &errorCode);
461
480 UParseError *parseError, UErrorCode &errorCode);
481
500 UParseError *parseError, UErrorCode &errorCode);
501
507 void clear();
508
516 clear();
517 aposMode=mode;
518 }
519
525 UBool operator==(const MessagePattern &other) const;
526
532 inline UBool operator!=(const MessagePattern &other) const {
533 return !operator==(other);
534 }
535
540 int32_t hashCode() const;
541
547 return aposMode;
548 }
549
550 // Java has package-private jdkAposMode() here.
551 // In C++, this is declared in the MessageImpl class.
552
558 return msg;
559 }
560
567 return hasArgNames;
568 }
569
576 return hasArgNumbers;
577 }
578
590 static int32_t validateArgumentName(const UnicodeString &name);
591
603
604 class Part;
605
612 int32_t countParts() const {
613 return partsLength;
614 }
615
622 const Part &getPart(int32_t i) const {
623 return parts[i];
624 }
625
634 return getPart(i).type;
635 }
636
644 int32_t getPatternIndex(int32_t partIndex) const {
645 return getPart(partIndex).index;
646 }
647
655 UnicodeString getSubstring(const Part &part) const {
656 return msg.tempSubString(part.index, part.length);
657 }
658
666 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
667 return 0==msg.compare(part.index, part.length, s);
668 }
669
676 double getNumericValue(const Part &part) const;
677
684 double getPluralOffset(int32_t pluralStart) const;
685
694 int32_t getLimitPartIndex(int32_t start) const {
695 int32_t limit=getPart(start).limitPartIndex;
696 if(limit<start) {
697 return start;
698 }
699 return limit;
700 }
701
709 class Part : public UMemory {
710 public:
715 Part() {}
716
723 return type;
724 }
725
731 int32_t getIndex() const {
732 return index;
733 }
734
741 int32_t getLength() const {
742 return length;
743 }
744
751 int32_t getLimit() const {
752 return index+length;
753 }
754
761 int32_t getValue() const {
762 return value;
763 }
764
772 UMessagePatternPartType type=getType();
774 return (UMessagePatternArgType)value;
775 } else {
777 }
778 }
779
789 }
790
796 UBool operator==(const Part &other) const;
797
803 inline UBool operator!=(const Part &other) const {
804 return !operator==(other);
805 }
806
811 int32_t hashCode() const {
812 return ((type*37+index)*37+length)*37+value;
813 }
814
815 private:
816 friend class MessagePattern;
817
818 static const int32_t MAX_LENGTH=0xffff;
819 static const int32_t MAX_VALUE=0x7fff;
820
821 // Some fields are not final because they are modified during pattern parsing.
822 // After pattern parsing, the parts are effectively immutable.
824 int32_t index;
825 uint16_t length;
826 int16_t value;
827 int32_t limitPartIndex;
828 };
829
830private:
831 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
832
833 void postParse();
834
835 int32_t parseMessage(int32_t index, int32_t msgStartLength,
836 int32_t nestingLevel, UMessagePatternArgType parentType,
837 UParseError *parseError, UErrorCode &errorCode);
838
839 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
840 UParseError *parseError, UErrorCode &errorCode);
841
842 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
843
844 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
845 UParseError *parseError, UErrorCode &errorCode);
846
847 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
848 UParseError *parseError, UErrorCode &errorCode);
849
858 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
859
860 int32_t parseArgNumber(int32_t start, int32_t limit) {
861 return parseArgNumber(msg, start, limit);
862 }
863
872 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
873 UParseError *parseError, UErrorCode &errorCode);
874
875 // Java has package-private appendReducedApostrophes() here.
876 // In C++, this is declared in the MessageImpl class.
877
878 int32_t skipWhiteSpace(int32_t index);
879
880 int32_t skipIdentifier(int32_t index);
881
886 int32_t skipDouble(int32_t index);
887
888 static UBool isArgTypeChar(UChar32 c);
889
890 UBool isChoice(int32_t index);
891
892 UBool isPlural(int32_t index);
893
894 UBool isSelect(int32_t index);
895
896 UBool isOrdinal(int32_t index);
897
902 UBool inMessageFormatPattern(int32_t nestingLevel);
903
908 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
909
910 void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
911 int32_t value, UErrorCode &errorCode);
912
913 void addLimitPart(int32_t start,
914 UMessagePatternPartType type, int32_t index, int32_t length,
915 int32_t value, UErrorCode &errorCode);
916
917 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
918
919 void setParseError(UParseError *parseError, int32_t index);
920
921 UBool init(UErrorCode &errorCode);
922 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
923
925 UnicodeString msg;
926 // ArrayList<Part> parts=new ArrayList<Part>();
927 MessagePatternPartsList *partsList;
928 Part *parts;
929 int32_t partsLength;
930 // ArrayList<Double> numericValues;
931 MessagePatternDoubleList *numericValuesList;
932 double *numericValues;
933 int32_t numericValuesLength;
934 UBool hasArgNames;
935 UBool hasArgNumbers;
936 UBool needsAutoQuoting;
937};
938
940
941#endif // !UCONFIG_NO_FORMATTING
942
943#endif // __MESSAGEPATTERN_H__
A message pattern "part", representing a pattern parsing event.
UBool operator==(const Part &other) const
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part.
int32_t hashCode() const
int32_t getValue() const
Returns a value associated with this part.
Part()
Default constructor, do not use.
int32_t getIndex() const
Returns the pattern string index associated with this Part.
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
UMessagePatternPartType getType() const
Returns the type of this part.
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
UBool operator!=(const Part &other) const
Parses and represents ICU MessageFormat patterns.
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
MessagePattern(UErrorCode &errorCode)
Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
virtual ~MessagePattern()
Destructor.
const Part & getPart(int32_t i) const
Gets the i-th pattern "part".
MessagePattern & parseChoiceStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a ChoiceFormat pattern string.
MessagePattern & parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a MessageFormat pattern string.
UnicodeString autoQuoteApostropheDeep() const
Returns a version of the parsed pattern string where each ASCII apostrophe is doubled (escaped) if it...
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern "part".
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Constructs a MessagePattern with default UMessagePatternApostropheMode and parses the MessageFormat p...
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
MessagePattern(const MessagePattern &other)
Copy constructor.
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
int32_t countParts() const
Returns the number of "parts" created by parsing the pattern string.
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern "part".
static int32_t validateArgumentName(const UnicodeString &name)
Validates and parses an argument name or argument number string.
int32_t hashCode() const
double getNumericValue(const Part &part) const
Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
void clear()
Clears this MessagePattern.
const UnicodeString & getPatternString() const
double getPluralOffset(int32_t pluralStart) const
Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
MessagePattern & operator=(const MessagePattern &other)
Assignment operator.
UBool operator==(const MessagePattern &other) const
MessagePattern & parsePluralStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a PluralFormat pattern string.
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
UBool operator!=(const MessagePattern &other) const
MessagePattern & parseSelectStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a SelectFormat pattern string.
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
Constructs an empty MessagePattern.
UMessagePatternApostropheMode getApostropheMode() const
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part's substring with the input string s.
UMemory is the common ICU base class.
Definition: uobject.h:110
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
UMessagePatternPartType
MessagePattern::Part type constants.
@ UMSGPAT_PART_TYPE_INSERT_CHAR
Indicates that a syntax character needs to be inserted for auto-quoting.
@ UMSGPAT_PART_TYPE_ARG_TYPE
The argument type.
@ UMSGPAT_PART_TYPE_MSG_START
Start of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_SELECTOR
A selector substring in a "complex" argument style.
@ UMSGPAT_PART_TYPE_ARG_LIMIT
End of an argument.
@ UMSGPAT_PART_TYPE_ARG_NUMBER
The argument number, provided by the value.
@ UMSGPAT_PART_TYPE_MSG_LIMIT
End of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_DOUBLE
A numeric value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_ARG_NAME
The argument name.
@ UMSGPAT_PART_TYPE_ARG_START
Start of an argument.
@ UMSGPAT_PART_TYPE_REPLACE_NUMBER
Indicates a syntactic (non-escaped) # symbol in a plural variant.
@ UMSGPAT_PART_TYPE_ARG_STYLE
The argument style text.
@ UMSGPAT_PART_TYPE_ARG_INT
An integer value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_SKIP_SYNTAX
Indicates a substring of the pattern string which is to be skipped when formatting.
UMessagePatternArgType
Argument type constants.
@ UMSGPAT_ARG_TYPE_SELECT
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
@ UMSGPAT_ARG_TYPE_NONE
The argument has no specified type.
@ UMSGPAT_ARG_TYPE_PLURAL
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e....
@ UMSGPAT_ARG_TYPE_SIMPLE
The argument has a "simple" type which is provided by the ARG_TYPE part.
@ UMSGPAT_ARG_TYPE_SELECTORDINAL
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
@ UMSGPAT_ARG_TYPE_CHOICE
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR,...
@ UMSGPAT_ARG_NAME_NOT_VALID
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
@ UMSGPAT_ARG_NAME_NOT_NUMBER
Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern ident...
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
@ UMSGPAT_APOS_DOUBLE_OPTIONAL
A literal apostrophe is represented by either a single or a double apostrophe pattern character.
@ UMSGPAT_APOS_DOUBLE_REQUIRED
A literal apostrophe must be represented by a double apostrophe pattern character.
C API: Parse Error Information.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129