| Index: source/i18n/collationdata.h
|
| diff --git a/source/i18n/collationdata.h b/source/i18n/collationdata.h
|
| index f24076b8e6474d7e6737106a2dbb5e326426893d..dd7fcebea46138eccf89b9155afa131893486f40 100644
|
| --- a/source/i18n/collationdata.h
|
| +++ b/source/i18n/collationdata.h
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2010-2014, International Business Machines
|
| +* Copyright (C) 2010-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * collationdata.h
|
| @@ -16,6 +16,7 @@
|
|
|
| #if !UCONFIG_NO_COLLATION
|
|
|
| +#include "unicode/ucol.h"
|
| #include "unicode/uniset.h"
|
| #include "collation.h"
|
| #include "normalizer2impl.h"
|
| @@ -25,6 +26,8 @@ struct UDataMemory;
|
|
|
| U_NAMESPACE_BEGIN
|
|
|
| +class UVector32;
|
| +
|
| /**
|
| * Collation data container.
|
| * Immutable data created by a CollationDataBuilder, or loaded from a file,
|
| @@ -33,6 +36,20 @@ U_NAMESPACE_BEGIN
|
| * Includes data for the collation base (root/default), aliased if this is not the base.
|
| */
|
| struct U_I18N_API CollationData : public UMemory {
|
| + // Note: The ucadata.icu loader could discover the reserved ranges by setting an array
|
| + // parallel with the ranges, and resetting ranges that are indexed.
|
| + // The reordering builder code could clone the resulting template array.
|
| + enum {
|
| + REORDER_RESERVED_BEFORE_LATIN = UCOL_REORDER_CODE_FIRST + 14,
|
| + REORDER_RESERVED_AFTER_LATIN
|
| + };
|
| +
|
| + enum {
|
| + MAX_NUM_SPECIAL_REORDER_CODES = 8,
|
| + /** C++ only, data reader check scriptStartsLength. */
|
| + MAX_NUM_SCRIPT_RANGES = 256
|
| + };
|
| +
|
| CollationData(const Normalizer2Impl &nfc)
|
| : trie(NULL),
|
| ce32s(NULL), ces(NULL), contexts(NULL), base(NULL),
|
| @@ -43,7 +60,7 @@ struct U_I18N_API CollationData : public UMemory {
|
| compressibleBytes(NULL),
|
| unsafeBackwardSet(NULL),
|
| fastLatinTable(NULL), fastLatinTableLength(0),
|
| - scripts(NULL), scriptsLength(0),
|
| + numScripts(0), scriptsIndex(NULL), scriptStarts(NULL), scriptStartsLength(0),
|
| rootElements(NULL), rootElementsLength(0) {}
|
|
|
| uint32_t getCE32(UChar32 c) const {
|
| @@ -137,13 +154,17 @@ struct U_I18N_API CollationData : public UMemory {
|
| int32_t dest[], int32_t capacity, UErrorCode &errorCode) const;
|
|
|
| /**
|
| - * Writes the permutation table for the given reordering of scripts and groups,
|
| - * mapping from default-order primary-weight lead bytes to reordered lead bytes.
|
| + * Writes the permutation of primary-weight ranges
|
| + * for the given reordering of scripts and groups.
|
| * The caller checks for illegal arguments and
|
| * takes care of [DEFAULT] and memory allocation.
|
| + *
|
| + * Each list element will be a (limit, offset) pair as described
|
| + * for the CollationSettings::reorderRanges.
|
| + * The list will be empty if no ranges are reordered.
|
| */
|
| - void makeReorderTable(const int32_t *reorder, int32_t length,
|
| - uint8_t table[256], UErrorCode &errorCode) const;
|
| + void makeReorderRanges(const int32_t *reorder, int32_t length,
|
| + UVector32 &ranges, UErrorCode &errorCode) const;
|
|
|
| /** @see jamoCE32s */
|
| static const int32_t JAMO_CE32S_LENGTH = 19 + 21 + 27;
|
| @@ -195,22 +216,26 @@ struct U_I18N_API CollationData : public UMemory {
|
| * Data for scripts and reordering groups.
|
| * Uses include building a reordering permutation table and
|
| * providing script boundaries to AlphabeticIndex.
|
| + */
|
| + int32_t numScripts;
|
| + /**
|
| + * The length of scriptsIndex is numScripts+16.
|
| + * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
|
| + * 16 special reorder codes (not all used) are mapped starting at numScripts.
|
| + * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
|
| + * There are special codes at the end for reorder-reserved primary ranges.
|
| *
|
| - * This data is a sorted list of primary-weight lead byte ranges (reordering groups),
|
| - * each with a list of pairs sorted in base collation order;
|
| - * each pair contains a script/reorder code and the lowest primary weight for that script.
|
| - *
|
| - * Data structure:
|
| - * - Each reordering group is encoded in n+2 16-bit integers.
|
| - * - First integer:
|
| - * Bits 15..8: First byte of the reordering group's range.
|
| - * Bits 7..0: Last byte of the reordering group's range.
|
| - * - Second integer:
|
| - * Length n of the list of script/reordering codes.
|
| - * - Each further integer is a script or reordering code.
|
| + * Multiple scripts may share a range and index, for example Hira & Kana.
|
| + */
|
| + const uint16_t *scriptsIndex;
|
| + /**
|
| + * Start primary weight (top 16 bits only) for a group/script/reserved range
|
| + * indexed by scriptsIndex.
|
| + * The first range (separators & terminators) and the last range (trailing weights)
|
| + * are not reorderable, and no scriptsIndex entry points to them.
|
| */
|
| - const uint16_t *scripts;
|
| - int32_t scriptsLength;
|
| + const uint16_t *scriptStarts;
|
| + int32_t scriptStartsLength;
|
|
|
| /**
|
| * Collation elements in the root collator.
|
| @@ -221,7 +246,12 @@ struct U_I18N_API CollationData : public UMemory {
|
| int32_t rootElementsLength;
|
|
|
| private:
|
| - int32_t findScript(int32_t script) const;
|
| + int32_t getScriptIndex(int32_t script) const;
|
| + void makeReorderRanges(const int32_t *reorder, int32_t length,
|
| + UBool latinMustMove,
|
| + UVector32 &ranges, UErrorCode &errorCode) const;
|
| + int32_t addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const;
|
| + int32_t addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const;
|
| };
|
|
|
| U_NAMESPACE_END
|
|
|