| Index: source/i18n/collationsets.h
|
| diff --git a/source/i18n/collationsets.h b/source/i18n/collationsets.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4f0a2b6dceac139b63b7c05e9b4ef1ba516f9ef7
|
| --- /dev/null
|
| +++ b/source/i18n/collationsets.h
|
| @@ -0,0 +1,142 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2013-2014, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* collationsets.h
|
| +*
|
| +* created on: 2013feb09
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#ifndef __COLLATIONSETS_H__
|
| +#define __COLLATIONSETS_H__
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/uniset.h"
|
| +#include "collation.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +struct CollationData;
|
| +
|
| +/**
|
| + * Finds the set of characters and strings that sort differently in the tailoring
|
| + * from the base data.
|
| + *
|
| + * Every mapping in the tailoring needs to be compared to the base,
|
| + * because some mappings are copied for optimization, and
|
| + * all contractions for a character are copied if any contractions for that character
|
| + * are added, modified or removed.
|
| + *
|
| + * It might be simpler to re-parse the rule string, but:
|
| + * - That would require duplicating some of the from-rules builder code.
|
| + * - That would make the runtime code depend on the builder.
|
| + * - That would only work if we have the rule string, and we allow users to
|
| + * omit the rule string from data files.
|
| + */
|
| +class TailoredSet : public UMemory {
|
| +public:
|
| + TailoredSet(UnicodeSet *t)
|
| + : data(NULL), baseData(NULL),
|
| + tailored(t),
|
| + suffix(NULL),
|
| + errorCode(U_ZERO_ERROR) {}
|
| +
|
| + void forData(const CollationData *d, UErrorCode &errorCode);
|
| +
|
| + /**
|
| + * @return U_SUCCESS(errorCode) in C++, void in Java
|
| + * @internal only public for access by callback
|
| + */
|
| + UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
|
| +
|
| +private:
|
| + void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
|
| + void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
|
| + void compareContractions(UChar32 c, const UChar *p, const UChar *q);
|
| +
|
| + void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
|
| + void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
|
| + void addContractions(UChar32 c, const UChar *p);
|
| + void addSuffix(UChar32 c, const UnicodeString &sfx);
|
| + void add(UChar32 c);
|
| +
|
| + /** Prefixes are reversed in the data structure. */
|
| + void setPrefix(const UnicodeString &pfx) {
|
| + unreversedPrefix = pfx;
|
| + unreversedPrefix.reverse();
|
| + }
|
| + void resetPrefix() {
|
| + unreversedPrefix.remove();
|
| + }
|
| +
|
| + const CollationData *data;
|
| + const CollationData *baseData;
|
| + UnicodeSet *tailored;
|
| + UnicodeString unreversedPrefix;
|
| + const UnicodeString *suffix;
|
| + UErrorCode errorCode;
|
| +};
|
| +
|
| +class ContractionsAndExpansions : public UMemory {
|
| +public:
|
| + class CESink : public UMemory {
|
| + public:
|
| + virtual ~CESink();
|
| + virtual void handleCE(int64_t ce) = 0;
|
| + virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
|
| + };
|
| +
|
| + ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
|
| + : data(NULL),
|
| + contractions(con), expansions(exp),
|
| + sink(s),
|
| + addPrefixes(prefixes),
|
| + checkTailored(0),
|
| + suffix(NULL),
|
| + errorCode(U_ZERO_ERROR) {}
|
| +
|
| + void forData(const CollationData *d, UErrorCode &errorCode);
|
| + void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
|
| +
|
| + // all following: @internal, only public for access by callback
|
| +
|
| + void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
|
| +
|
| + void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
|
| + void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
|
| +
|
| + void addExpansions(UChar32 start, UChar32 end);
|
| + void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
|
| +
|
| + /** Prefixes are reversed in the data structure. */
|
| + void setPrefix(const UnicodeString &pfx) {
|
| + unreversedPrefix = pfx;
|
| + unreversedPrefix.reverse();
|
| + }
|
| + void resetPrefix() {
|
| + unreversedPrefix.remove();
|
| + }
|
| +
|
| + const CollationData *data;
|
| + UnicodeSet *contractions;
|
| + UnicodeSet *expansions;
|
| + CESink *sink;
|
| + UBool addPrefixes;
|
| + int8_t checkTailored; // -1: collected tailored +1: exclude tailored
|
| + UnicodeSet tailored;
|
| + UnicodeSet ranges;
|
| + UnicodeString unreversedPrefix;
|
| + const UnicodeString *suffix;
|
| + int64_t ces[Collation::MAX_EXPANSION_LENGTH];
|
| + UErrorCode errorCode;
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif // !UCONFIG_NO_COLLATION
|
| +#endif // __COLLATIONSETS_H__
|
|
|