Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(450)

Side by Side Diff: source/i18n/collationdata.h

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines 3 * Copyright (C) 2010-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationdata.h 6 * collationdata.h
7 * 7 *
8 * created on: 2010oct27 8 * created on: 2010oct27
9 * created by: Markus W. Scherer 9 * created by: Markus W. Scherer
10 */ 10 */
11 11
12 #ifndef __COLLATIONDATA_H__ 12 #ifndef __COLLATIONDATA_H__
13 #define __COLLATIONDATA_H__ 13 #define __COLLATIONDATA_H__
14 14
15 #include "unicode/utypes.h" 15 #include "unicode/utypes.h"
16 16
17 #if !UCONFIG_NO_COLLATION 17 #if !UCONFIG_NO_COLLATION
18 18
19 #include "unicode/ucol.h"
19 #include "unicode/uniset.h" 20 #include "unicode/uniset.h"
20 #include "collation.h" 21 #include "collation.h"
21 #include "normalizer2impl.h" 22 #include "normalizer2impl.h"
22 #include "utrie2.h" 23 #include "utrie2.h"
23 24
24 struct UDataMemory; 25 struct UDataMemory;
25 26
26 U_NAMESPACE_BEGIN 27 U_NAMESPACE_BEGIN
27 28
29 class UVector32;
30
28 /** 31 /**
29 * Collation data container. 32 * Collation data container.
30 * Immutable data created by a CollationDataBuilder, or loaded from a file, 33 * Immutable data created by a CollationDataBuilder, or loaded from a file,
31 * or deserialized from API-provided binary data. 34 * or deserialized from API-provided binary data.
32 * 35 *
33 * Includes data for the collation base (root/default), aliased if this is not t he base. 36 * Includes data for the collation base (root/default), aliased if this is not t he base.
34 */ 37 */
35 struct U_I18N_API CollationData : public UMemory { 38 struct U_I18N_API CollationData : public UMemory {
39 // Note: The ucadata.icu loader could discover the reserved ranges by settin g an array
40 // parallel with the ranges, and resetting ranges that are indexed.
41 // The reordering builder code could clone the resulting template array.
42 enum {
43 REORDER_RESERVED_BEFORE_LATIN = UCOL_REORDER_CODE_FIRST + 14,
44 REORDER_RESERVED_AFTER_LATIN
45 };
46
47 enum {
48 MAX_NUM_SPECIAL_REORDER_CODES = 8,
49 /** C++ only, data reader check scriptStartsLength. */
50 MAX_NUM_SCRIPT_RANGES = 256
51 };
52
36 CollationData(const Normalizer2Impl &nfc) 53 CollationData(const Normalizer2Impl &nfc)
37 : trie(NULL), 54 : trie(NULL),
38 ce32s(NULL), ces(NULL), contexts(NULL), base(NULL), 55 ce32s(NULL), ces(NULL), contexts(NULL), base(NULL),
39 jamoCE32s(NULL), 56 jamoCE32s(NULL),
40 nfcImpl(nfc), 57 nfcImpl(nfc),
41 numericPrimary(0x12000000), 58 numericPrimary(0x12000000),
42 ce32sLength(0), cesLength(0), contextsLength(0), 59 ce32sLength(0), cesLength(0), contextsLength(0),
43 compressibleBytes(NULL), 60 compressibleBytes(NULL),
44 unsafeBackwardSet(NULL), 61 unsafeBackwardSet(NULL),
45 fastLatinTable(NULL), fastLatinTableLength(0), 62 fastLatinTable(NULL), fastLatinTableLength(0),
46 scripts(NULL), scriptsLength(0), 63 numScripts(0), scriptsIndex(NULL), scriptStarts(NULL), scriptStart sLength(0),
47 rootElements(NULL), rootElementsLength(0) {} 64 rootElements(NULL), rootElementsLength(0) {}
48 65
49 uint32_t getCE32(UChar32 c) const { 66 uint32_t getCE32(UChar32 c) const {
50 return UTRIE2_GET32(trie, c); 67 return UTRIE2_GET32(trie, c);
51 } 68 }
52 69
53 uint32_t getCE32FromSupplementary(UChar32 c) const { 70 uint32_t getCE32FromSupplementary(UChar32 c) const {
54 return UTRIE2_GET32_FROM_SUPP(trie, c); 71 return UTRIE2_GET32_FROM_SUPP(trie, c);
55 } 72 }
56 73
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
130 /** 147 /**
131 * Finds the reordering group which contains the primary weight. 148 * Finds the reordering group which contains the primary weight.
132 * @return the first script of the group, or -1 if the weight is beyond the last group 149 * @return the first script of the group, or -1 if the weight is beyond the last group
133 */ 150 */
134 int32_t getGroupForPrimary(uint32_t p) const; 151 int32_t getGroupForPrimary(uint32_t p) const;
135 152
136 int32_t getEquivalentScripts(int32_t script, 153 int32_t getEquivalentScripts(int32_t script,
137 int32_t dest[], int32_t capacity, UErrorCode &e rrorCode) const; 154 int32_t dest[], int32_t capacity, UErrorCode &e rrorCode) const;
138 155
139 /** 156 /**
140 * Writes the permutation table for the given reordering of scripts and grou ps, 157 * Writes the permutation of primary-weight ranges
141 * mapping from default-order primary-weight lead bytes to reordered lead by tes. 158 * for the given reordering of scripts and groups.
142 * The caller checks for illegal arguments and 159 * The caller checks for illegal arguments and
143 * takes care of [DEFAULT] and memory allocation. 160 * takes care of [DEFAULT] and memory allocation.
161 *
162 * Each list element will be a (limit, offset) pair as described
163 * for the CollationSettings::reorderRanges.
164 * The list will be empty if no ranges are reordered.
144 */ 165 */
145 void makeReorderTable(const int32_t *reorder, int32_t length, 166 void makeReorderRanges(const int32_t *reorder, int32_t length,
146 uint8_t table[256], UErrorCode &errorCode) const; 167 UVector32 &ranges, UErrorCode &errorCode) const;
147 168
148 /** @see jamoCE32s */ 169 /** @see jamoCE32s */
149 static const int32_t JAMO_CE32S_LENGTH = 19 + 21 + 27; 170 static const int32_t JAMO_CE32S_LENGTH = 19 + 21 + 27;
150 171
151 /** Main lookup trie. */ 172 /** Main lookup trie. */
152 const UTrie2 *trie; 173 const UTrie2 *trie;
153 /** 174 /**
154 * Array of CE32 values. 175 * Array of CE32 values.
155 * At index 0 there must be CE32(U+0000) 176 * At index 0 there must be CE32(U+0000)
156 * to support U+0000's special-tag for NUL-termination handling. 177 * to support U+0000's special-tag for NUL-termination handling.
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 * Fast Latin table for common-Latin-text string comparisons. 209 * Fast Latin table for common-Latin-text string comparisons.
189 * Data structure see class CollationFastLatin. 210 * Data structure see class CollationFastLatin.
190 */ 211 */
191 const uint16_t *fastLatinTable; 212 const uint16_t *fastLatinTable;
192 int32_t fastLatinTableLength; 213 int32_t fastLatinTableLength;
193 214
194 /** 215 /**
195 * Data for scripts and reordering groups. 216 * Data for scripts and reordering groups.
196 * Uses include building a reordering permutation table and 217 * Uses include building a reordering permutation table and
197 * providing script boundaries to AlphabeticIndex. 218 * providing script boundaries to AlphabeticIndex.
219 */
220 int32_t numScripts;
221 /**
222 * The length of scriptsIndex is numScripts+16.
223 * It maps from a UScriptCode or a special reorder code to an entry in scrip tStarts.
224 * 16 special reorder codes (not all used) are mapped starting at numScripts .
225 * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like spa ce/punct/digit.
226 * There are special codes at the end for reorder-reserved primary ranges.
198 * 227 *
199 * This data is a sorted list of primary-weight lead byte ranges (reordering groups), 228 * Multiple scripts may share a range and index, for example Hira & Kana.
200 * each with a list of pairs sorted in base collation order;
201 * each pair contains a script/reorder code and the lowest primary weight fo r that script.
202 *
203 * Data structure:
204 * - Each reordering group is encoded in n+2 16-bit integers.
205 * - First integer:
206 * Bits 15..8: First byte of the reordering group's range.
207 * Bits 7..0: Last byte of the reordering group's range.
208 * - Second integer:
209 * Length n of the list of script/reordering codes.
210 * - Each further integer is a script or reordering code.
211 */ 229 */
212 const uint16_t *scripts; 230 const uint16_t *scriptsIndex;
213 int32_t scriptsLength; 231 /**
232 * Start primary weight (top 16 bits only) for a group/script/reserved range
233 * indexed by scriptsIndex.
234 * The first range (separators & terminators) and the last range (trailing w eights)
235 * are not reorderable, and no scriptsIndex entry points to them.
236 */
237 const uint16_t *scriptStarts;
238 int32_t scriptStartsLength;
214 239
215 /** 240 /**
216 * Collation elements in the root collator. 241 * Collation elements in the root collator.
217 * Used by the CollationRootElements class. The data structure is described there. 242 * Used by the CollationRootElements class. The data structure is described there.
218 * NULL in a tailoring. 243 * NULL in a tailoring.
219 */ 244 */
220 const uint32_t *rootElements; 245 const uint32_t *rootElements;
221 int32_t rootElementsLength; 246 int32_t rootElementsLength;
222 247
223 private: 248 private:
224 int32_t findScript(int32_t script) const; 249 int32_t getScriptIndex(int32_t script) const;
250 void makeReorderRanges(const int32_t *reorder, int32_t length,
251 UBool latinMustMove,
252 UVector32 &ranges, UErrorCode &errorCode) const;
253 int32_t addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const;
254 int32_t addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit ) const;
225 }; 255 };
226 256
227 U_NAMESPACE_END 257 U_NAMESPACE_END
228 258
229 #endif // !UCONFIG_NO_COLLATION 259 #endif // !UCONFIG_NO_COLLATION
230 #endif // __COLLATIONDATA_H__ 260 #endif // __COLLATIONDATA_H__
OLDNEW
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698