Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: source/i18n/collationfastlatin.h

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationdatawriter.cpp ('k') | source/i18n/collationfastlatin.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines 3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationfastlatin.h 6 * collationfastlatin.h
7 * 7 *
8 * created on: 2013aug09 8 * created on: 2013aug09
9 * created by: Markus W. Scherer 9 * created by: Markus W. Scherer
10 */ 10 */
11 11
12 #ifndef __COLLATIONFASTLATIN_H__ 12 #ifndef __COLLATIONFASTLATIN_H__
13 #define __COLLATIONFASTLATIN_H__ 13 #define __COLLATIONFASTLATIN_H__
(...skipping 10 matching lines...) Expand all
24 class U_I18N_API CollationFastLatin /* all static */ { 24 class U_I18N_API CollationFastLatin /* all static */ {
25 public: 25 public:
26 /** 26 /**
27 * Fast Latin format version (one byte 1..FF). 27 * Fast Latin format version (one byte 1..FF).
28 * Must be incremented for any runtime-incompatible changes, 28 * Must be incremented for any runtime-incompatible changes,
29 * in particular, for changes to any of the following constants. 29 * in particular, for changes to any of the following constants.
30 * 30 *
31 * When the major version number of the main data format changes, 31 * When the major version number of the main data format changes,
32 * we can reset this fast Latin version to 1. 32 * we can reset this fast Latin version to 1.
33 */ 33 */
34 static const uint16_t VERSION = 1; 34 static const uint16_t VERSION = 2;
35 35
36 static const int32_t LATIN_MAX = 0x17f; 36 static const int32_t LATIN_MAX = 0x17f;
37 static const int32_t LATIN_LIMIT = LATIN_MAX + 1; 37 static const int32_t LATIN_LIMIT = LATIN_MAX + 1;
38 38
39 static const int32_t LATIN_MAX_UTF8_LEAD = 0xc5; // UTF-8 lead byte of LATI N_MAX 39 static const int32_t LATIN_MAX_UTF8_LEAD = 0xc5; // UTF-8 lead byte of LATI N_MAX
40 40
41 static const int32_t PUNCT_START = 0x2000; 41 static const int32_t PUNCT_START = 0x2000;
42 static const int32_t PUNCT_LIMIT = 0x2040; 42 static const int32_t PUNCT_LIMIT = 0x2040;
43 43
44 // excludes U+FFFE & U+FFFF 44 // excludes U+FFFE & U+FFFF
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
245 static uint32_t getCases(uint32_t variableTop, UBool strengthIsPrimary, uint 32_t pair); 245 static uint32_t getCases(uint32_t variableTop, UBool strengthIsPrimary, uint 32_t pair);
246 static uint32_t getTertiaries(uint32_t variableTop, UBool withCaseBits, uint 32_t pair); 246 static uint32_t getTertiaries(uint32_t variableTop, UBool withCaseBits, uint 32_t pair);
247 static uint32_t getQuaternaries(uint32_t variableTop, uint32_t pair); 247 static uint32_t getQuaternaries(uint32_t variableTop, uint32_t pair);
248 248
249 private: 249 private:
250 CollationFastLatin(); // no constructor 250 CollationFastLatin(); // no constructor
251 }; 251 };
252 252
253 /* 253 /*
254 * Format of the CollationFastLatin data table. 254 * Format of the CollationFastLatin data table.
255 * CollationFastLatin::VERSION = 1. 255 * CollationFastLatin::VERSION = 2.
256 * 256 *
257 * This table contains data for a Latin-text collation fastpath. 257 * This table contains data for a Latin-text collation fastpath.
258 * The data is stored as an array of uint16_t which contains the following parts . 258 * The data is stored as an array of uint16_t which contains the following parts .
259 * 259 *
260 * uint16_t -- version & header length 260 * uint16_t -- version & header length
261 * Bits 15..8: version, must match the VERSION 261 * Bits 15..8: version, must match the VERSION
262 * 7..0: length of the header 262 * 7..0: length of the header
263 * 263 *
264 * uint16_t varTops[header length - 1] 264 * uint16_t varTops[header length - 1]
265 * Version 2:
266 * varTops[m] is the highest CollationFastLatin long-primary weight
267 * of supported maxVariable group m
268 * (special reorder group space, punct, symbol, currency).
269 *
270 * Version 1:
265 * Each of these values maps the variable top lead byte of a supported maxVari able group 271 * Each of these values maps the variable top lead byte of a supported maxVari able group
266 * to the highest CollationFastLatin long-primary weight. 272 * to the highest CollationFastLatin long-primary weight.
267 * The values are stored in ascending order. 273 * The values are stored in ascending order.
268 * Bits 15..7: max fast-Latin long-primary weight (bits 11..3 shifted left by 4 bits) 274 * Bits 15..7: max fast-Latin long-primary weight (bits 11..3 shifted left by 4 bits)
269 * 6..0: regular primary lead byte 275 * 6..0: regular primary lead byte
270 * 276 *
271 * uint16_t miniCEs[0x1c0] 277 * uint16_t miniCEs[0x1c0]
272 * A mini collation element for each character U+0000..U+017F and U+2000..U+20 3F. 278 * A mini collation element for each character U+0000..U+017F and U+2000..U+20 3F.
273 * Each value encodes one or two mini CEs (two are possible if the first one 279 * Each value encodes one or two mini CEs (two are possible if the first one
274 * has a short mini primary and the second one is a secondary CE, i.e., primar y == 0), 280 * has a short mini primary and the second one is a secondary CE, i.e., primar y == 0),
(...skipping 11 matching lines...) Expand all
286 * uint16_t contractions[variable length]; 292 * uint16_t contractions[variable length];
287 * Contraction mini CEs contain an offset relative to just after the miniCEs t able. 293 * Contraction mini CEs contain an offset relative to just after the miniCEs t able.
288 * It points to a list of tuples which map from a contraction suffix character to a result. 294 * It points to a list of tuples which map from a contraction suffix character to a result.
289 * First uint16_t of each tuple: 295 * First uint16_t of each tuple:
290 * Bits 10..9: Length of the result (1..3), see comments on CONTR_LENGTH_SHI FT. 296 * Bits 10..9: Length of the result (1..3), see comments on CONTR_LENGTH_SHI FT.
291 * Bits 8..0: Contraction character, see comments on CONTR_CHAR_MASK. 297 * Bits 8..0: Contraction character, see comments on CONTR_CHAR_MASK.
292 * This is followed by 0, 1, or 2 uint16_t according to the length. 298 * This is followed by 0, 1, or 2 uint16_t according to the length.
293 * Each list is terminated by an entry with CONTR_CHAR_MASK. 299 * Each list is terminated by an entry with CONTR_CHAR_MASK.
294 * Each list starts with such an entry which also contains the default result 300 * Each list starts with such an entry which also contains the default result
295 * for when there is no contraction match. 301 * for when there is no contraction match.
302 *
303 * -----------------
304 * Changes for version 2 (ICU 55)
305 *
306 * Special reorder groups do not necessarily start on whole primary lead bytes a ny more.
307 * Therefore, the varTops data has a new format:
308 * Version 1 stored the lead bytes of the highest root primaries for
309 * the maxVariable-supported special reorder groups.
310 * Now the top 16 bits would need to be stored,
311 * and it is simpler to store only the fast-Latin weights.
296 */ 312 */
297 313
298 U_NAMESPACE_END 314 U_NAMESPACE_END
299 315
300 #endif // !UCONFIG_NO_COLLATION 316 #endif // !UCONFIG_NO_COLLATION
301 #endif // __COLLATIONFASTLATIN_H__ 317 #endif // __COLLATIONFASTLATIN_H__
OLDNEW
« no previous file with comments | « source/i18n/collationdatawriter.cpp ('k') | source/i18n/collationfastlatin.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698