Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: source/i18n/collationdata.h

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationdata.h
7 *
8 * created on: 2010oct27
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __COLLATIONDATA_H__
13 #define __COLLATIONDATA_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "unicode/uniset.h"
20 #include "collation.h"
21 #include "normalizer2impl.h"
22 #include "utrie2.h"
23
24 struct UDataMemory;
25
26 U_NAMESPACE_BEGIN
27
28 /**
29 * Collation data container.
30 * Immutable data created by a CollationDataBuilder, or loaded from a file,
31 * or deserialized from API-provided binary data.
32 *
33 * Includes data for the collation base (root/default), aliased if this is not t he base.
34 */
35 struct U_I18N_API CollationData : public UMemory {
36 CollationData(const Normalizer2Impl &nfc)
37 : trie(NULL),
38 ce32s(NULL), ces(NULL), contexts(NULL), base(NULL),
39 jamoCE32s(NULL),
40 nfcImpl(nfc),
41 numericPrimary(0x12000000),
42 ce32sLength(0), cesLength(0), contextsLength(0),
43 compressibleBytes(NULL),
44 unsafeBackwardSet(NULL),
45 fastLatinTable(NULL), fastLatinTableLength(0),
46 scripts(NULL), scriptsLength(0),
47 rootElements(NULL), rootElementsLength(0) {}
48
49 uint32_t getCE32(UChar32 c) const {
50 return UTRIE2_GET32(trie, c);
51 }
52
53 uint32_t getCE32FromSupplementary(UChar32 c) const {
54 return UTRIE2_GET32_FROM_SUPP(trie, c);
55 }
56
57 UBool isDigit(UChar32 c) const {
58 return c < 0x660 ? c <= 0x39 && 0x30 <= c :
59 Collation::hasCE32Tag(getCE32(c), Collation::DIGIT_TAG);
60 }
61
62 UBool isUnsafeBackward(UChar32 c, UBool numeric) const {
63 return unsafeBackwardSet->contains(c) || (numeric && isDigit(c));
64 }
65
66 UBool isCompressibleLeadByte(uint32_t b) const {
67 return compressibleBytes[b];
68 }
69
70 inline UBool isCompressiblePrimary(uint32_t p) const {
71 return isCompressibleLeadByte(p >> 24);
72 }
73
74 /**
75 * Returns the CE32 from two contexts words.
76 * Access to the defaultCE32 for contraction and prefix matching.
77 */
78 static uint32_t readCE32(const UChar *p) {
79 return ((uint32_t)p[0] << 16) | p[1];
80 }
81
82 /**
83 * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG).
84 * Requires that ce32 is special.
85 */
86 uint32_t getIndirectCE32(uint32_t ce32) const;
87 /**
88 * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG),
89 * if ce32 is special.
90 */
91 uint32_t getFinalCE32(uint32_t ce32) const;
92
93 /**
94 * Computes a CE from c's ce32 which has the OFFSET_TAG.
95 */
96 int64_t getCEFromOffsetCE32(UChar32 c, uint32_t ce32) const {
97 int64_t dataCE = ces[Collation::indexFromCE32(ce32)];
98 return Collation::makeCE(Collation::getThreeBytePrimaryForOffsetData(c, dataCE));
99 }
100
101 /**
102 * Returns the single CE that c maps to.
103 * Sets U_UNSUPPORTED_ERROR if c does not map to a single CE.
104 */
105 int64_t getSingleCE(UChar32 c, UErrorCode &errorCode) const;
106
107 /**
108 * Returns the FCD16 value for code point c. c must be >= 0.
109 */
110 uint16_t getFCD16(UChar32 c) const {
111 return nfcImpl.getFCD16(c);
112 }
113
114 /**
115 * Returns the first primary for the script's reordering group.
116 * @return the primary with only the first primary lead byte of the group
117 * (not necessarily an actual root collator primary weight),
118 * or 0 if the script is unknown
119 */
120 uint32_t getFirstPrimaryForGroup(int32_t script) const;
121
122 /**
123 * Returns the last primary for the script's reordering group.
124 * @return the last primary of the group
125 * (not an actual root collator primary weight),
126 * or 0 if the script is unknown
127 */
128 uint32_t getLastPrimaryForGroup(int32_t script) const;
129
130 /**
131 * Finds the reordering group which contains the primary weight.
132 * @return the first script of the group, or -1 if the weight is beyond the last group
133 */
134 int32_t getGroupForPrimary(uint32_t p) const;
135
136 int32_t getEquivalentScripts(int32_t script,
137 int32_t dest[], int32_t capacity, UErrorCode &e rrorCode) const;
138
139 /**
140 * Writes the permutation table for the given reordering of scripts and grou ps,
141 * mapping from default-order primary-weight lead bytes to reordered lead by tes.
142 * The caller checks for illegal arguments and
143 * takes care of [DEFAULT] and memory allocation.
144 */
145 void makeReorderTable(const int32_t *reorder, int32_t length,
146 uint8_t table[256], UErrorCode &errorCode) const;
147
148 /** @see jamoCE32s */
149 static const int32_t JAMO_CE32S_LENGTH = 19 + 21 + 27;
150
151 /** Main lookup trie. */
152 const UTrie2 *trie;
153 /**
154 * Array of CE32 values.
155 * At index 0 there must be CE32(U+0000)
156 * to support U+0000's special-tag for NUL-termination handling.
157 */
158 const uint32_t *ce32s;
159 /** Array of CE values for expansions and OFFSET_TAG. */
160 const int64_t *ces;
161 /** Array of prefix and contraction-suffix matching data. */
162 const UChar *contexts;
163 /** Base collation data, or NULL if this data itself is a base. */
164 const CollationData *base;
165 /**
166 * Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T.
167 * They are normally simple CE32s, rarely expansions.
168 * For fast handling of HANGUL_TAG.
169 */
170 const uint32_t *jamoCE32s;
171 const Normalizer2Impl &nfcImpl;
172 /** The single-byte primary weight (xx000000) for numeric collation. */
173 uint32_t numericPrimary;
174
175 int32_t ce32sLength;
176 int32_t cesLength;
177 int32_t contextsLength;
178
179 /** 256 flags for which primary-weight lead bytes are compressible. */
180 const UBool *compressibleBytes;
181 /**
182 * Set of code points that are unsafe for starting string comparison after a n identical prefix,
183 * or in backwards CE iteration.
184 */
185 const UnicodeSet *unsafeBackwardSet;
186
187 /**
188 * Fast Latin table for common-Latin-text string comparisons.
189 * Data structure see class CollationFastLatin.
190 */
191 const uint16_t *fastLatinTable;
192 int32_t fastLatinTableLength;
193
194 /**
195 * Data for scripts and reordering groups.
196 * Uses include building a reordering permutation table and
197 * providing script boundaries to AlphabeticIndex.
198 *
199 * This data is a sorted list of primary-weight lead byte ranges (reordering groups),
200 * each with a list of pairs sorted in base collation order;
201 * each pair contains a script/reorder code and the lowest primary weight fo r that script.
202 *
203 * Data structure:
204 * - Each reordering group is encoded in n+2 16-bit integers.
205 * - First integer:
206 * Bits 15..8: First byte of the reordering group's range.
207 * Bits 7..0: Last byte of the reordering group's range.
208 * - Second integer:
209 * Length n of the list of script/reordering codes.
210 * - Each further integer is a script or reordering code.
211 */
212 const uint16_t *scripts;
213 int32_t scriptsLength;
214
215 /**
216 * Collation elements in the root collator.
217 * Used by the CollationRootElements class. The data structure is described there.
218 * NULL in a tailoring.
219 */
220 const uint32_t *rootElements;
221 int32_t rootElementsLength;
222
223 private:
224 int32_t findScript(int32_t script) const;
225 };
226
227 U_NAMESPACE_END
228
229 #endif // !UCONFIG_NO_COLLATION
230 #endif // __COLLATIONDATA_H__
OLDNEW
« no previous file with comments | « source/i18n/collationcompare.cpp ('k') | source/i18n/collationdata.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698