| Index: source/i18n/collationruleparser.h
|
| diff --git a/source/i18n/collationruleparser.h b/source/i18n/collationruleparser.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..3c2b22c9dc455d3bb36840c6eea1b479822e8423
|
| --- /dev/null
|
| +++ b/source/i18n/collationruleparser.h
|
| @@ -0,0 +1,195 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2013-2014, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* collationruleparser.h
|
| +*
|
| +* created on: 2013apr10
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#ifndef __COLLATIONRULEPARSER_H__
|
| +#define __COLLATIONRULEPARSER_H__
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/ucol.h"
|
| +#include "unicode/uniset.h"
|
| +#include "unicode/unistr.h"
|
| +
|
| +struct UParseError;
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +struct CollationData;
|
| +struct CollationTailoring;
|
| +
|
| +class Locale;
|
| +class Normalizer2;
|
| +
|
| +struct CollationSettings;
|
| +
|
| +class U_I18N_API CollationRuleParser : public UMemory {
|
| +public:
|
| + /** Special reset positions. */
|
| + enum Position {
|
| + FIRST_TERTIARY_IGNORABLE,
|
| + LAST_TERTIARY_IGNORABLE,
|
| + FIRST_SECONDARY_IGNORABLE,
|
| + LAST_SECONDARY_IGNORABLE,
|
| + FIRST_PRIMARY_IGNORABLE,
|
| + LAST_PRIMARY_IGNORABLE,
|
| + FIRST_VARIABLE,
|
| + LAST_VARIABLE,
|
| + FIRST_REGULAR,
|
| + LAST_REGULAR,
|
| + FIRST_IMPLICIT,
|
| + LAST_IMPLICIT,
|
| + FIRST_TRAILING,
|
| + LAST_TRAILING
|
| + };
|
| +
|
| + /**
|
| + * First character of contractions that encode special reset positions.
|
| + * U+FFFE cannot be tailored via rule syntax.
|
| + *
|
| + * The second contraction character is POS_BASE + Position.
|
| + */
|
| + static const UChar POS_LEAD = 0xfffe;
|
| + /**
|
| + * Base for the second character of contractions that encode special reset positions.
|
| + * Braille characters U+28xx are printable and normalization-inert.
|
| + * @see POS_LEAD
|
| + */
|
| + static const UChar POS_BASE = 0x2800;
|
| +
|
| + class U_I18N_API Sink : public UObject {
|
| + public:
|
| + virtual ~Sink();
|
| + /**
|
| + * Adds a reset.
|
| + * strength=UCOL_IDENTICAL for &str.
|
| + * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
|
| + */
|
| + virtual void addReset(int32_t strength, const UnicodeString &str,
|
| + const char *&errorReason, UErrorCode &errorCode) = 0;
|
| + /**
|
| + * Adds a relation with strength and prefix | str / extension.
|
| + */
|
| + virtual void addRelation(int32_t strength, const UnicodeString &prefix,
|
| + const UnicodeString &str, const UnicodeString &extension,
|
| + const char *&errorReason, UErrorCode &errorCode) = 0;
|
| +
|
| + virtual void suppressContractions(const UnicodeSet &set, const char *&errorReason,
|
| + UErrorCode &errorCode);
|
| +
|
| + virtual void optimize(const UnicodeSet &set, const char *&errorReason,
|
| + UErrorCode &errorCode);
|
| + };
|
| +
|
| + class U_I18N_API Importer : public UObject {
|
| + public:
|
| + virtual ~Importer();
|
| + virtual void getRules(
|
| + const char *localeID, const char *collationType,
|
| + UnicodeString &rules,
|
| + const char *&errorReason, UErrorCode &errorCode) = 0;
|
| + };
|
| +
|
| + /**
|
| + * Constructor.
|
| + * The Sink must be set before parsing.
|
| + * The Importer can be set, otherwise [import locale] syntax is not supported.
|
| + */
|
| + CollationRuleParser(const CollationData *base, UErrorCode &errorCode);
|
| + ~CollationRuleParser();
|
| +
|
| + /**
|
| + * Sets the pointer to a Sink object.
|
| + * The pointer is aliased: Pointer copy without cloning or taking ownership.
|
| + */
|
| + void setSink(Sink *sinkAlias) {
|
| + sink = sinkAlias;
|
| + }
|
| +
|
| + /**
|
| + * Sets the pointer to an Importer object.
|
| + * The pointer is aliased: Pointer copy without cloning or taking ownership.
|
| + */
|
| + void setImporter(Importer *importerAlias) {
|
| + importer = importerAlias;
|
| + }
|
| +
|
| + void parse(const UnicodeString &ruleString,
|
| + CollationSettings &outSettings,
|
| + UParseError *outParseError,
|
| + UErrorCode &errorCode);
|
| +
|
| + const char *getErrorReason() const { return errorReason; }
|
| +
|
| + /**
|
| + * Gets a script or reorder code from its string representation.
|
| + * @return the script/reorder code, or
|
| + * -1 if not recognized
|
| + */
|
| + static int32_t getReorderCode(const char *word);
|
| +
|
| +private:
|
| + /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */
|
| + static const int32_t STRENGTH_MASK = 0xf;
|
| + static const int32_t STARRED_FLAG = 0x10;
|
| + static const int32_t OFFSET_SHIFT = 8;
|
| +
|
| + void parse(const UnicodeString &ruleString, UErrorCode &errorCode);
|
| + void parseRuleChain(UErrorCode &errorCode);
|
| + int32_t parseResetAndPosition(UErrorCode &errorCode);
|
| + int32_t parseRelationOperator(UErrorCode &errorCode);
|
| + void parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode);
|
| + void parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode);
|
| + int32_t parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode);
|
| + int32_t parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode);
|
| +
|
| + /**
|
| + * Sets str to a contraction of U+FFFE and (U+2800 + Position).
|
| + * @return rule index after the special reset position
|
| + */
|
| + int32_t parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode);
|
| + void parseSetting(UErrorCode &errorCode);
|
| + void parseReordering(const UnicodeString &raw, UErrorCode &errorCode);
|
| + static UColAttributeValue getOnOffValue(const UnicodeString &s);
|
| +
|
| + int32_t parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode);
|
| + int32_t readWords(int32_t i, UnicodeString &raw) const;
|
| + int32_t skipComment(int32_t i) const;
|
| +
|
| + void setParseError(const char *reason, UErrorCode &errorCode);
|
| + void setErrorContext();
|
| +
|
| + /**
|
| + * ASCII [:P:] and [:S:]:
|
| + * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E]
|
| + */
|
| + static UBool isSyntaxChar(UChar32 c);
|
| + int32_t skipWhiteSpace(int32_t i) const;
|
| +
|
| + const Normalizer2 &nfd, &nfc;
|
| +
|
| + const UnicodeString *rules;
|
| + const CollationData *const baseData;
|
| + CollationSettings *settings;
|
| + UParseError *parseError;
|
| + const char *errorReason;
|
| +
|
| + Sink *sink;
|
| + Importer *importer;
|
| +
|
| + int32_t ruleIndex;
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif // !UCONFIG_NO_COLLATION
|
| +#endif // __COLLATIONRULEPARSER_H__
|
|
|