Index: source/i18n/collationkeys.h |
diff --git a/source/i18n/collationkeys.h b/source/i18n/collationkeys.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d1cc76f0283ac04fd2e23c87bba9bcef5753b2ee |
--- /dev/null |
+++ b/source/i18n/collationkeys.h |
@@ -0,0 +1,167 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 2012-2014, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+******************************************************************************* |
+* collationkeys.h |
+* |
+* created on: 2012sep02 |
+* created by: Markus W. Scherer |
+*/ |
+ |
+#ifndef __COLLATIONKEYS_H__ |
+#define __COLLATIONKEYS_H__ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "unicode/bytestream.h" |
+#include "unicode/ucol.h" |
+#include "charstr.h" |
+#include "collation.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+class CollationIterator; |
+struct CollationDataReader; |
+struct CollationSettings; |
+ |
+class SortKeyByteSink : public ByteSink { |
+public: |
+ SortKeyByteSink(char *dest, int32_t destCapacity) |
+ : buffer_(dest), capacity_(destCapacity), |
+ appended_(0), ignore_(0) {} |
+ virtual ~SortKeyByteSink(); |
+ |
+ void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } |
+ |
+ virtual void Append(const char *bytes, int32_t n); |
+ void Append(uint32_t b) { |
+ if (ignore_ > 0) { |
+ --ignore_; |
+ } else { |
+ if (appended_ < capacity_ || Resize(1, appended_)) { |
+ buffer_[appended_] = (char)b; |
+ } |
+ ++appended_; |
+ } |
+ } |
+ virtual char *GetAppendBuffer(int32_t min_capacity, |
+ int32_t desired_capacity_hint, |
+ char *scratch, int32_t scratch_capacity, |
+ int32_t *result_capacity); |
+ int32_t NumberOfBytesAppended() const { return appended_; } |
+ |
+ /** |
+ * @return how many bytes can be appended (including ignored ones) |
+ * without reallocation |
+ */ |
+ int32_t GetRemainingCapacity() const { |
+ // Either ignore_ or appended_ should be 0. |
+ return ignore_ + capacity_ - appended_; |
+ } |
+ |
+ UBool Overflowed() const { return appended_ > capacity_; } |
+ /** @return FALSE if memory allocation failed */ |
+ UBool IsOk() const { return buffer_ != NULL; } |
+ |
+protected: |
+ virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; |
+ virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; |
+ |
+ void SetNotOk() { |
+ buffer_ = NULL; |
+ capacity_ = 0; |
+ } |
+ |
+ char *buffer_; |
+ int32_t capacity_; |
+ int32_t appended_; |
+ int32_t ignore_; |
+ |
+private: |
+ SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented |
+ SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented |
+}; |
+ |
+class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { |
+public: |
+ class LevelCallback : public UMemory { |
+ public: |
+ virtual ~LevelCallback(); |
+ /** |
+ * @param level The next level about to be written to the ByteSink. |
+ * @return TRUE if the level is to be written |
+ * (the base class implementation always returns TRUE) |
+ */ |
+ virtual UBool needToWrite(Collation::Level level); |
+ }; |
+ |
+ /** |
+ * Writes the sort key bytes for minLevel up to the iterator data's strength. |
+ * Optionally writes the case level. |
+ * Stops writing levels when callback.needToWrite(level) returns FALSE. |
+ * Separates levels with the LEVEL_SEPARATOR_BYTE |
+ * but does not write a TERMINATOR_BYTE. |
+ */ |
+ static void writeSortKeyUpToQuaternary(CollationIterator &iter, |
+ const UBool *compressibleBytes, |
+ const CollationSettings &settings, |
+ SortKeyByteSink &sink, |
+ Collation::Level minLevel, LevelCallback &callback, |
+ UBool preflight, UErrorCode &errorCode); |
+private: |
+ friend struct CollationDataReader; |
+ |
+ CollationKeys(); // no instantiation |
+ |
+ // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. |
+ static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; |
+ static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; |
+ static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; |
+ static const int32_t SEC_COMMON_MAX_COUNT = 0x21; |
+ |
+ // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. |
+ static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; |
+ static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; |
+ static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; |
+ static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; |
+ |
+ // Case level, upperFirst: Compress up to 13 common weights as 3..15. |
+ static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; |
+ static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; |
+ static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; |
+ |
+ // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. |
+ static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; |
+ static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; |
+ static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; |
+ static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; |
+ |
+ // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. |
+ static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; |
+ static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; |
+ static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; |
+ static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; |
+ |
+ // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. |
+ static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; |
+ static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; |
+ static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; |
+ static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; |
+ |
+ // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. |
+ static const uint32_t QUAT_COMMON_LOW = 0x1c; |
+ static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; |
+ static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; |
+ static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; |
+ // Primary weights shifted to quaternary level must be encoded with |
+ // a lead byte below the common-weight compression range. |
+ static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b |
+}; |
+ |
+U_NAMESPACE_END |
+ |
+#endif // !UCONFIG_NO_COLLATION |
+#endif // __COLLATIONKEYS_H__ |