| Index: source/i18n/collationkeys.h
|
| diff --git a/source/i18n/collationkeys.h b/source/i18n/collationkeys.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d1cc76f0283ac04fd2e23c87bba9bcef5753b2ee
|
| --- /dev/null
|
| +++ b/source/i18n/collationkeys.h
|
| @@ -0,0 +1,167 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2012-2014, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* collationkeys.h
|
| +*
|
| +* created on: 2012sep02
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#ifndef __COLLATIONKEYS_H__
|
| +#define __COLLATIONKEYS_H__
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/bytestream.h"
|
| +#include "unicode/ucol.h"
|
| +#include "charstr.h"
|
| +#include "collation.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +class CollationIterator;
|
| +struct CollationDataReader;
|
| +struct CollationSettings;
|
| +
|
| +class SortKeyByteSink : public ByteSink {
|
| +public:
|
| + SortKeyByteSink(char *dest, int32_t destCapacity)
|
| + : buffer_(dest), capacity_(destCapacity),
|
| + appended_(0), ignore_(0) {}
|
| + virtual ~SortKeyByteSink();
|
| +
|
| + void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
|
| +
|
| + virtual void Append(const char *bytes, int32_t n);
|
| + void Append(uint32_t b) {
|
| + if (ignore_ > 0) {
|
| + --ignore_;
|
| + } else {
|
| + if (appended_ < capacity_ || Resize(1, appended_)) {
|
| + buffer_[appended_] = (char)b;
|
| + }
|
| + ++appended_;
|
| + }
|
| + }
|
| + virtual char *GetAppendBuffer(int32_t min_capacity,
|
| + int32_t desired_capacity_hint,
|
| + char *scratch, int32_t scratch_capacity,
|
| + int32_t *result_capacity);
|
| + int32_t NumberOfBytesAppended() const { return appended_; }
|
| +
|
| + /**
|
| + * @return how many bytes can be appended (including ignored ones)
|
| + * without reallocation
|
| + */
|
| + int32_t GetRemainingCapacity() const {
|
| + // Either ignore_ or appended_ should be 0.
|
| + return ignore_ + capacity_ - appended_;
|
| + }
|
| +
|
| + UBool Overflowed() const { return appended_ > capacity_; }
|
| + /** @return FALSE if memory allocation failed */
|
| + UBool IsOk() const { return buffer_ != NULL; }
|
| +
|
| +protected:
|
| + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
|
| + virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
|
| +
|
| + void SetNotOk() {
|
| + buffer_ = NULL;
|
| + capacity_ = 0;
|
| + }
|
| +
|
| + char *buffer_;
|
| + int32_t capacity_;
|
| + int32_t appended_;
|
| + int32_t ignore_;
|
| +
|
| +private:
|
| + SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
|
| + SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
|
| +};
|
| +
|
| +class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
|
| +public:
|
| + class LevelCallback : public UMemory {
|
| + public:
|
| + virtual ~LevelCallback();
|
| + /**
|
| + * @param level The next level about to be written to the ByteSink.
|
| + * @return TRUE if the level is to be written
|
| + * (the base class implementation always returns TRUE)
|
| + */
|
| + virtual UBool needToWrite(Collation::Level level);
|
| + };
|
| +
|
| + /**
|
| + * Writes the sort key bytes for minLevel up to the iterator data's strength.
|
| + * Optionally writes the case level.
|
| + * Stops writing levels when callback.needToWrite(level) returns FALSE.
|
| + * Separates levels with the LEVEL_SEPARATOR_BYTE
|
| + * but does not write a TERMINATOR_BYTE.
|
| + */
|
| + static void writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| + const UBool *compressibleBytes,
|
| + const CollationSettings &settings,
|
| + SortKeyByteSink &sink,
|
| + Collation::Level minLevel, LevelCallback &callback,
|
| + UBool preflight, UErrorCode &errorCode);
|
| +private:
|
| + friend struct CollationDataReader;
|
| +
|
| + CollationKeys(); // no instantiation
|
| +
|
| + // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
|
| + static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
|
| + static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
|
| + static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
|
| + static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
|
| +
|
| + // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
|
| + static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
|
| + static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
|
| + static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
|
| + static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
|
| +
|
| + // Case level, upperFirst: Compress up to 13 common weights as 3..15.
|
| + static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
|
| + static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
|
| + static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
|
| +
|
| + // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
|
| + static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
|
| + static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
|
| + static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
|
| + static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
|
| +
|
| + // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
|
| + static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
|
| + static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
|
| + static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
|
| + static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
|
| +
|
| + // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
|
| + static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
|
| + static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
|
| + static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
|
| + static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
|
| +
|
| + // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
|
| + static const uint32_t QUAT_COMMON_LOW = 0x1c;
|
| + static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
|
| + static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
|
| + static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
|
| + // Primary weights shifted to quaternary level must be encoded with
|
| + // a lead byte below the common-weight compression range.
|
| + static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif // !UCONFIG_NO_COLLATION
|
| +#endif // __COLLATIONKEYS_H__
|
|
|