Index: source/i18n/collationruleparser.h |
diff --git a/source/i18n/collationruleparser.h b/source/i18n/collationruleparser.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3c2b22c9dc455d3bb36840c6eea1b479822e8423 |
--- /dev/null |
+++ b/source/i18n/collationruleparser.h |
@@ -0,0 +1,195 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 2013-2014, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+******************************************************************************* |
+* collationruleparser.h |
+* |
+* created on: 2013apr10 |
+* created by: Markus W. Scherer |
+*/ |
+ |
+#ifndef __COLLATIONRULEPARSER_H__ |
+#define __COLLATIONRULEPARSER_H__ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "unicode/ucol.h" |
+#include "unicode/uniset.h" |
+#include "unicode/unistr.h" |
+ |
+struct UParseError; |
+ |
+U_NAMESPACE_BEGIN |
+ |
+struct CollationData; |
+struct CollationTailoring; |
+ |
+class Locale; |
+class Normalizer2; |
+ |
+struct CollationSettings; |
+ |
+class U_I18N_API CollationRuleParser : public UMemory { |
+public: |
+ /** Special reset positions. */ |
+ enum Position { |
+ FIRST_TERTIARY_IGNORABLE, |
+ LAST_TERTIARY_IGNORABLE, |
+ FIRST_SECONDARY_IGNORABLE, |
+ LAST_SECONDARY_IGNORABLE, |
+ FIRST_PRIMARY_IGNORABLE, |
+ LAST_PRIMARY_IGNORABLE, |
+ FIRST_VARIABLE, |
+ LAST_VARIABLE, |
+ FIRST_REGULAR, |
+ LAST_REGULAR, |
+ FIRST_IMPLICIT, |
+ LAST_IMPLICIT, |
+ FIRST_TRAILING, |
+ LAST_TRAILING |
+ }; |
+ |
+ /** |
+ * First character of contractions that encode special reset positions. |
+ * U+FFFE cannot be tailored via rule syntax. |
+ * |
+ * The second contraction character is POS_BASE + Position. |
+ */ |
+ static const UChar POS_LEAD = 0xfffe; |
+ /** |
+ * Base for the second character of contractions that encode special reset positions. |
+ * Braille characters U+28xx are printable and normalization-inert. |
+ * @see POS_LEAD |
+ */ |
+ static const UChar POS_BASE = 0x2800; |
+ |
+ class U_I18N_API Sink : public UObject { |
+ public: |
+ virtual ~Sink(); |
+ /** |
+ * Adds a reset. |
+ * strength=UCOL_IDENTICAL for &str. |
+ * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3. |
+ */ |
+ virtual void addReset(int32_t strength, const UnicodeString &str, |
+ const char *&errorReason, UErrorCode &errorCode) = 0; |
+ /** |
+ * Adds a relation with strength and prefix | str / extension. |
+ */ |
+ virtual void addRelation(int32_t strength, const UnicodeString &prefix, |
+ const UnicodeString &str, const UnicodeString &extension, |
+ const char *&errorReason, UErrorCode &errorCode) = 0; |
+ |
+ virtual void suppressContractions(const UnicodeSet &set, const char *&errorReason, |
+ UErrorCode &errorCode); |
+ |
+ virtual void optimize(const UnicodeSet &set, const char *&errorReason, |
+ UErrorCode &errorCode); |
+ }; |
+ |
+ class U_I18N_API Importer : public UObject { |
+ public: |
+ virtual ~Importer(); |
+ virtual void getRules( |
+ const char *localeID, const char *collationType, |
+ UnicodeString &rules, |
+ const char *&errorReason, UErrorCode &errorCode) = 0; |
+ }; |
+ |
+ /** |
+ * Constructor. |
+ * The Sink must be set before parsing. |
+ * The Importer can be set, otherwise [import locale] syntax is not supported. |
+ */ |
+ CollationRuleParser(const CollationData *base, UErrorCode &errorCode); |
+ ~CollationRuleParser(); |
+ |
+ /** |
+ * Sets the pointer to a Sink object. |
+ * The pointer is aliased: Pointer copy without cloning or taking ownership. |
+ */ |
+ void setSink(Sink *sinkAlias) { |
+ sink = sinkAlias; |
+ } |
+ |
+ /** |
+ * Sets the pointer to an Importer object. |
+ * The pointer is aliased: Pointer copy without cloning or taking ownership. |
+ */ |
+ void setImporter(Importer *importerAlias) { |
+ importer = importerAlias; |
+ } |
+ |
+ void parse(const UnicodeString &ruleString, |
+ CollationSettings &outSettings, |
+ UParseError *outParseError, |
+ UErrorCode &errorCode); |
+ |
+ const char *getErrorReason() const { return errorReason; } |
+ |
+ /** |
+ * Gets a script or reorder code from its string representation. |
+ * @return the script/reorder code, or |
+ * -1 if not recognized |
+ */ |
+ static int32_t getReorderCode(const char *word); |
+ |
+private: |
+ /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */ |
+ static const int32_t STRENGTH_MASK = 0xf; |
+ static const int32_t STARRED_FLAG = 0x10; |
+ static const int32_t OFFSET_SHIFT = 8; |
+ |
+ void parse(const UnicodeString &ruleString, UErrorCode &errorCode); |
+ void parseRuleChain(UErrorCode &errorCode); |
+ int32_t parseResetAndPosition(UErrorCode &errorCode); |
+ int32_t parseRelationOperator(UErrorCode &errorCode); |
+ void parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode); |
+ void parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode); |
+ int32_t parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode); |
+ int32_t parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode); |
+ |
+ /** |
+ * Sets str to a contraction of U+FFFE and (U+2800 + Position). |
+ * @return rule index after the special reset position |
+ */ |
+ int32_t parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode); |
+ void parseSetting(UErrorCode &errorCode); |
+ void parseReordering(const UnicodeString &raw, UErrorCode &errorCode); |
+ static UColAttributeValue getOnOffValue(const UnicodeString &s); |
+ |
+ int32_t parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode); |
+ int32_t readWords(int32_t i, UnicodeString &raw) const; |
+ int32_t skipComment(int32_t i) const; |
+ |
+ void setParseError(const char *reason, UErrorCode &errorCode); |
+ void setErrorContext(); |
+ |
+ /** |
+ * ASCII [:P:] and [:S:]: |
+ * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E] |
+ */ |
+ static UBool isSyntaxChar(UChar32 c); |
+ int32_t skipWhiteSpace(int32_t i) const; |
+ |
+ const Normalizer2 &nfd, &nfc; |
+ |
+ const UnicodeString *rules; |
+ const CollationData *const baseData; |
+ CollationSettings *settings; |
+ UParseError *parseError; |
+ const char *errorReason; |
+ |
+ Sink *sink; |
+ Importer *importer; |
+ |
+ int32_t ruleIndex; |
+}; |
+ |
+U_NAMESPACE_END |
+ |
+#endif // !UCONFIG_NO_COLLATION |
+#endif // __COLLATIONRULEPARSER_H__ |