Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Unified Diff: source/i18n/collationdata.h

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/collationdata.h
diff --git a/source/i18n/collationdata.h b/source/i18n/collationdata.h
index f24076b8e6474d7e6737106a2dbb5e326426893d..dd7fcebea46138eccf89b9155afa131893486f40 100644
--- a/source/i18n/collationdata.h
+++ b/source/i18n/collationdata.h
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2010-2014, International Business Machines
+* Copyright (C) 2010-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* collationdata.h
@@ -16,6 +16,7 @@
#if !UCONFIG_NO_COLLATION
+#include "unicode/ucol.h"
#include "unicode/uniset.h"
#include "collation.h"
#include "normalizer2impl.h"
@@ -25,6 +26,8 @@ struct UDataMemory;
U_NAMESPACE_BEGIN
+class UVector32;
+
/**
* Collation data container.
* Immutable data created by a CollationDataBuilder, or loaded from a file,
@@ -33,6 +36,20 @@ U_NAMESPACE_BEGIN
* Includes data for the collation base (root/default), aliased if this is not the base.
*/
struct U_I18N_API CollationData : public UMemory {
+ // Note: The ucadata.icu loader could discover the reserved ranges by setting an array
+ // parallel with the ranges, and resetting ranges that are indexed.
+ // The reordering builder code could clone the resulting template array.
+ enum {
+ REORDER_RESERVED_BEFORE_LATIN = UCOL_REORDER_CODE_FIRST + 14,
+ REORDER_RESERVED_AFTER_LATIN
+ };
+
+ enum {
+ MAX_NUM_SPECIAL_REORDER_CODES = 8,
+ /** C++ only, data reader check scriptStartsLength. */
+ MAX_NUM_SCRIPT_RANGES = 256
+ };
+
CollationData(const Normalizer2Impl &nfc)
: trie(NULL),
ce32s(NULL), ces(NULL), contexts(NULL), base(NULL),
@@ -43,7 +60,7 @@ struct U_I18N_API CollationData : public UMemory {
compressibleBytes(NULL),
unsafeBackwardSet(NULL),
fastLatinTable(NULL), fastLatinTableLength(0),
- scripts(NULL), scriptsLength(0),
+ numScripts(0), scriptsIndex(NULL), scriptStarts(NULL), scriptStartsLength(0),
rootElements(NULL), rootElementsLength(0) {}
uint32_t getCE32(UChar32 c) const {
@@ -137,13 +154,17 @@ struct U_I18N_API CollationData : public UMemory {
int32_t dest[], int32_t capacity, UErrorCode &errorCode) const;
/**
- * Writes the permutation table for the given reordering of scripts and groups,
- * mapping from default-order primary-weight lead bytes to reordered lead bytes.
+ * Writes the permutation of primary-weight ranges
+ * for the given reordering of scripts and groups.
* The caller checks for illegal arguments and
* takes care of [DEFAULT] and memory allocation.
+ *
+ * Each list element will be a (limit, offset) pair as described
+ * for the CollationSettings::reorderRanges.
+ * The list will be empty if no ranges are reordered.
*/
- void makeReorderTable(const int32_t *reorder, int32_t length,
- uint8_t table[256], UErrorCode &errorCode) const;
+ void makeReorderRanges(const int32_t *reorder, int32_t length,
+ UVector32 &ranges, UErrorCode &errorCode) const;
/** @see jamoCE32s */
static const int32_t JAMO_CE32S_LENGTH = 19 + 21 + 27;
@@ -195,22 +216,26 @@ struct U_I18N_API CollationData : public UMemory {
* Data for scripts and reordering groups.
* Uses include building a reordering permutation table and
* providing script boundaries to AlphabeticIndex.
+ */
+ int32_t numScripts;
+ /**
+ * The length of scriptsIndex is numScripts+16.
+ * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
+ * 16 special reorder codes (not all used) are mapped starting at numScripts.
+ * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
+ * There are special codes at the end for reorder-reserved primary ranges.
*
- * This data is a sorted list of primary-weight lead byte ranges (reordering groups),
- * each with a list of pairs sorted in base collation order;
- * each pair contains a script/reorder code and the lowest primary weight for that script.
- *
- * Data structure:
- * - Each reordering group is encoded in n+2 16-bit integers.
- * - First integer:
- * Bits 15..8: First byte of the reordering group's range.
- * Bits 7..0: Last byte of the reordering group's range.
- * - Second integer:
- * Length n of the list of script/reordering codes.
- * - Each further integer is a script or reordering code.
+ * Multiple scripts may share a range and index, for example Hira & Kana.
+ */
+ const uint16_t *scriptsIndex;
+ /**
+ * Start primary weight (top 16 bits only) for a group/script/reserved range
+ * indexed by scriptsIndex.
+ * The first range (separators & terminators) and the last range (trailing weights)
+ * are not reorderable, and no scriptsIndex entry points to them.
*/
- const uint16_t *scripts;
- int32_t scriptsLength;
+ const uint16_t *scriptStarts;
+ int32_t scriptStartsLength;
/**
* Collation elements in the root collator.
@@ -221,7 +246,12 @@ struct U_I18N_API CollationData : public UMemory {
int32_t rootElementsLength;
private:
- int32_t findScript(int32_t script) const;
+ int32_t getScriptIndex(int32_t script) const;
+ void makeReorderRanges(const int32_t *reorder, int32_t length,
+ UBool latinMustMove,
+ UVector32 &ranges, UErrorCode &errorCode) const;
+ int32_t addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const;
+ int32_t addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const;
};
U_NAMESPACE_END
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698