Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: source/i18n/collationfastlatin.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationfastlatin.h ('k') | source/i18n/collationfastlatinbuilder.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines 3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationfastlatin.cpp 6 * collationfastlatin.cpp
7 * 7 *
8 * created on: 2013aug18 8 * created on: 2013aug18
9 * created by: Markus W. Scherer 9 * created by: Markus W. Scherer
10 */ 10 */
11 11
12 #include "unicode/utypes.h" 12 #include "unicode/utypes.h"
13 13
14 #if !UCONFIG_NO_COLLATION 14 #if !UCONFIG_NO_COLLATION
15 15
16 #include "unicode/ucol.h" 16 #include "unicode/ucol.h"
17 #include "collationdata.h" 17 #include "collationdata.h"
18 #include "collationfastlatin.h" 18 #include "collationfastlatin.h"
19 #include "collationsettings.h" 19 #include "collationsettings.h"
20 #include "putilimp.h" // U_ALIGN_CODE
21 #include "uassert.h" 20 #include "uassert.h"
22 21
23 U_NAMESPACE_BEGIN 22 U_NAMESPACE_BEGIN
24 23
25 int32_t 24 int32_t
26 CollationFastLatin::getOptions(const CollationData *data, const CollationSetting s &settings, 25 CollationFastLatin::getOptions(const CollationData *data, const CollationSetting s &settings,
27 uint16_t *primaries, int32_t capacity) { 26 uint16_t *primaries, int32_t capacity) {
28 const uint16_t *table = data->fastLatinTable; 27 const uint16_t *table = data->fastLatinTable;
29 if(table == NULL) { return -1; } 28 if(table == NULL) { return -1; }
30 U_ASSERT(capacity == LATIN_LIMIT); 29 U_ASSERT(capacity == LATIN_LIMIT);
31 if(capacity != LATIN_LIMIT) { return -1; } 30 if(capacity != LATIN_LIMIT) { return -1; }
32 31
33 uint32_t miniVarTop; 32 uint32_t miniVarTop;
34 if((settings.options & CollationSettings::ALTERNATE_MASK) == 0) { 33 if((settings.options & CollationSettings::ALTERNATE_MASK) == 0) {
35 // No mini primaries are variable, set a variableTop just below the 34 // No mini primaries are variable, set a variableTop just below the
36 // lowest long mini primary. 35 // lowest long mini primary.
37 miniVarTop = MIN_LONG - 1; 36 miniVarTop = MIN_LONG - 1;
38 } else { 37 } else {
39 uint32_t v1 = settings.variableTop >> 24;
40 int32_t headerLength = *table & 0xff; 38 int32_t headerLength = *table & 0xff;
41 int32_t i = headerLength - 1; 39 int32_t i = 1 + settings.getMaxVariable();
42 if(i <= 0 || v1 > (table[i] & 0x7fu)) { 40 if(i >= headerLength) {
43 return -1; // variableTop >= digits, should not occur 41 return -1; // variableTop >= digits, should not occur
44 } 42 }
45 while(i > 1 && v1 <= (table[i - 1] & 0x7fu)) { --i; } 43 miniVarTop = table[i];
46 // In the table header, the miniVarTop is in bits 15..7, with 4 zero bit s 19..16 implied.
47 // Shift right to make it comparable with long mini primaries in bits 15 ..3.
48 miniVarTop = (table[i] & 0xff80) >> 4;
49 } 44 }
50 45
51 const uint8_t *reorderTable = settings.reorderTable; 46 UBool digitsAreReordered = FALSE;
52 if(reorderTable != NULL) { 47 if(settings.hasReordering()) {
53 const uint16_t *scripts = data->scripts; 48 uint32_t prevStart = 0;
54 int32_t length = data->scriptsLength; 49 uint32_t beforeDigitStart = 0;
55 uint32_t prevLastByte = 0; 50 uint32_t digitStart = 0;
56 for(int32_t i = 0; i < length;) { 51 uint32_t afterDigitStart = 0;
57 // reordered last byte of the group 52 for(int32_t group = UCOL_REORDER_CODE_FIRST;
58 uint32_t lastByte = reorderTable[scripts[i] & 0xff]; 53 group < UCOL_REORDER_CODE_FIRST + CollationData::MAX_NUM_SPECIAL _REORDER_CODES;
59 if(lastByte < prevLastByte) { 54 ++group) {
60 // The permutation affects the groups up to Latin. 55 uint32_t start = data->getFirstPrimaryForGroup(group);
61 return -1; 56 start = settings.reorder(start);
57 if(group == UCOL_REORDER_CODE_DIGIT) {
58 beforeDigitStart = prevStart;
59 digitStart = start;
60 } else if(start != 0) {
61 if(start < prevStart) {
62 // The permutation affects the groups up to Latin.
63 return -1;
64 }
65 // In the future, there might be a special group between digits & Latin.
66 if(digitStart != 0 && afterDigitStart == 0 && prevStart == befor eDigitStart) {
67 afterDigitStart = start;
68 }
69 prevStart = start;
62 } 70 }
63 if(scripts[i + 2] == USCRIPT_LATIN) { break; } 71 }
64 i = i + 2 + scripts[i + 1]; 72 uint32_t latinStart = data->getFirstPrimaryForGroup(USCRIPT_LATIN);
65 prevLastByte = lastByte; 73 latinStart = settings.reorder(latinStart);
74 if(latinStart < prevStart) {
75 return -1;
76 }
77 if(afterDigitStart == 0) {
78 afterDigitStart = latinStart;
79 }
80 if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
81 digitsAreReordered = TRUE;
66 } 82 }
67 } 83 }
68 84
69 table += (table[0] & 0xff); // skip the header 85 table += (table[0] & 0xff); // skip the header
70 for(UChar32 c = 0; c < LATIN_LIMIT; ++c) { 86 for(UChar32 c = 0; c < LATIN_LIMIT; ++c) {
71 uint32_t p = table[c]; 87 uint32_t p = table[c];
72 if(p >= MIN_SHORT) { 88 if(p >= MIN_SHORT) {
73 p &= SHORT_PRIMARY_MASK; 89 p &= SHORT_PRIMARY_MASK;
74 } else if(p > miniVarTop) { 90 } else if(p > miniVarTop) {
75 p &= LONG_PRIMARY_MASK; 91 p &= LONG_PRIMARY_MASK;
76 } else { 92 } else {
77 p = 0; 93 p = 0;
78 } 94 }
79 primaries[c] = (uint16_t)p; 95 primaries[c] = (uint16_t)p;
80 } 96 }
81 if((settings.options & CollationSettings::NUMERIC) != 0) { 97 if(digitsAreReordered || (settings.options & CollationSettings::NUMERIC) != 0) {
82 // Bail out for digits. 98 // Bail out for digits.
83 for(UChar32 c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; } 99 for(UChar32 c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
84 } 100 }
85 101
86 // Shift the miniVarTop above other options. 102 // Shift the miniVarTop above other options.
87 return ((int32_t)miniVarTop << 16) | settings.options; 103 return ((int32_t)miniVarTop << 16) | settings.options;
88 } 104 }
89 105
90 int32_t 106 int32_t
91 CollationFastLatin::compareUTF16(const uint16_t *table, const uint16_t *primarie s, int32_t options, 107 CollationFastLatin::compareUTF16(const uint16_t *table, const uint16_t *primarie s, int32_t options,
92 const UChar *left, int32_t leftLength, 108 const UChar *left, int32_t leftLength,
93 const UChar *right, int32_t rightLength) { 109 const UChar *right, int32_t rightLength) {
94 // This is a modified copy of CollationCompare::compareUpToQuaternary(), 110 // This is a modified copy of CollationCompare::compareUpToQuaternary(),
95 // optimized for common Latin text. 111 // optimized for common Latin text.
96 // Keep them in sync! 112 // Keep them in sync!
97 // Keep compareUTF16() and compareUTF8() in sync very closely! 113 // Keep compareUTF16() and compareUTF8() in sync very closely!
98 114
99 U_ASSERT((table[0] >> 8) == VERSION); 115 U_ASSERT((table[0] >> 8) == VERSION);
100 table += (table[0] & 0xff); // skip the header 116 table += (table[0] & 0xff); // skip the header
101 uint32_t variableTop = (uint32_t)options >> 16; // see getOptions() 117 uint32_t variableTop = (uint32_t)options >> 16; // see getOptions()
102 options &= 0xffff; // needed for CollationSettings::getStrength() to work 118 options &= 0xffff; // needed for CollationSettings::getStrength() to work
103 119
104 // Check for supported characters, fetch mini CEs, and compare primaries. 120 // Check for supported characters, fetch mini CEs, and compare primaries.
105 U_ALIGN_CODE(16);
106 int32_t leftIndex = 0, rightIndex = 0; 121 int32_t leftIndex = 0, rightIndex = 0;
107 /** 122 /**
108 * Single mini CE or a pair. 123 * Single mini CE or a pair.
109 * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits. 124 * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
110 * If there is only one, then it is in the lower bits, and the upper bits ar e 0. 125 * If there is only one, then it is in the lower bits, and the upper bits ar e 0.
111 */ 126 */
112 uint32_t leftPair = 0, rightPair = 0; 127 uint32_t leftPair = 0, rightPair = 0;
113 for(;;) { 128 for(;;) {
114 // We fetch CEs until we get a non-ignorable primary or reach the end. 129 // We fetch CEs until we get a non-ignorable primary or reach the end.
115 while(leftPair == 0) { 130 while(leftPair == 0) {
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after
432 const uint8_t *left, int32_t leftLength, 447 const uint8_t *left, int32_t leftLength,
433 const uint8_t *right, int32_t rightLength) { 448 const uint8_t *right, int32_t rightLength) {
434 // Keep compareUTF16() and compareUTF8() in sync very closely! 449 // Keep compareUTF16() and compareUTF8() in sync very closely!
435 450
436 U_ASSERT((table[0] >> 8) == VERSION); 451 U_ASSERT((table[0] >> 8) == VERSION);
437 table += (table[0] & 0xff); // skip the header 452 table += (table[0] & 0xff); // skip the header
438 uint32_t variableTop = (uint32_t)options >> 16; // see RuleBasedCollator::g etFastLatinOptions() 453 uint32_t variableTop = (uint32_t)options >> 16; // see RuleBasedCollator::g etFastLatinOptions()
439 options &= 0xffff; // needed for CollationSettings::getStrength() to work 454 options &= 0xffff; // needed for CollationSettings::getStrength() to work
440 455
441 // Check for supported characters, fetch mini CEs, and compare primaries. 456 // Check for supported characters, fetch mini CEs, and compare primaries.
442 U_ALIGN_CODE(16);
443 int32_t leftIndex = 0, rightIndex = 0; 457 int32_t leftIndex = 0, rightIndex = 0;
444 /** 458 /**
445 * Single mini CE or a pair. 459 * Single mini CE or a pair.
446 * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits. 460 * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
447 * If there is only one, then it is in the lower bits, and the upper bits ar e 0. 461 * If there is only one, then it is in the lower bits, and the upper bits ar e 0.
448 */ 462 */
449 uint32_t leftPair = 0, rightPair = 0; 463 uint32_t leftPair = 0, rightPair = 0;
450 // Note: There is no need to assemble the code point. 464 // Note: There is no need to assemble the code point.
451 // We only need to look up the table entry for the character, 465 // We only need to look up the table entry for the character,
452 // and nextPair() looks for whether c==0. 466 // and nextPair() looks for whether c==0.
(...skipping 621 matching lines...) Expand 10 before | Expand all | Expand 10 after
1074 U_ASSERT(ce >= MIN_LONG); 1088 U_ASSERT(ce >= MIN_LONG);
1075 pair &= TWO_LONG_PRIMARIES_MASK; // variable 1089 pair &= TWO_LONG_PRIMARIES_MASK; // variable
1076 } 1090 }
1077 } 1091 }
1078 return pair; 1092 return pair;
1079 } 1093 }
1080 1094
1081 U_NAMESPACE_END 1095 U_NAMESPACE_END
1082 1096
1083 #endif // !UCONFIG_NO_COLLATION 1097 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationfastlatin.h ('k') | source/i18n/collationfastlatinbuilder.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698