Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: source/i18n/identifier_info.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/identifier_info.h ('k') | source/i18n/indiancal.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2012-2013, International Business Machines 3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 */ 6 */
7 7
8 #include "unicode/utypes.h" 8 #include "unicode/utypes.h"
9 9
10 #include "unicode/uchar.h" 10 #include "unicode/uchar.h"
11 #include "unicode/utf16.h" 11 #include "unicode/utf16.h"
12 12
13 #include "identifier_info.h" 13 #include "identifier_info.h"
14 #include "mutex.h" 14 #include "mutex.h"
15 #include "scriptset.h" 15 #include "scriptset.h"
16 #include "ucln_in.h" 16 #include "ucln_in.h"
17 #include "uvector.h" 17 #include "uvector.h"
18 18
19 U_NAMESPACE_BEGIN 19 U_NAMESPACE_BEGIN
20 20
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 21 static UnicodeSet *ASCII;
22 static ScriptSet *JAPANESE;
23 static ScriptSet *CHINESE;
24 static ScriptSet *KOREAN;
25 static ScriptSet *CONFUSABLE_WITH_LATIN;
26 static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
22 27
23 static UMutex gInitMutex = U_MUTEX_INITIALIZER;
24 static UBool gStaticsAreInitialized = FALSE;
25 28
26 UnicodeSet *IdentifierInfo::ASCII; 29 U_CDECL_BEGIN
27 ScriptSet *IdentifierInfo::JAPANESE; 30 static UBool U_CALLCONV
28 ScriptSet *IdentifierInfo::CHINESE; 31 IdentifierInfo_cleanup(void) {
29 ScriptSet *IdentifierInfo::KOREAN;
30 ScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN;
31
32 UBool IdentifierInfo::cleanup() {
33 delete ASCII; 32 delete ASCII;
34 ASCII = NULL; 33 ASCII = NULL;
35 delete JAPANESE; 34 delete JAPANESE;
36 JAPANESE = NULL; 35 JAPANESE = NULL;
37 delete CHINESE; 36 delete CHINESE;
38 CHINESE = NULL; 37 CHINESE = NULL;
39 delete KOREAN; 38 delete KOREAN;
40 KOREAN = NULL; 39 KOREAN = NULL;
41 delete CONFUSABLE_WITH_LATIN; 40 delete CONFUSABLE_WITH_LATIN;
42 CONFUSABLE_WITH_LATIN = NULL; 41 CONFUSABLE_WITH_LATIN = NULL;
43 gStaticsAreInitialized = FALSE; 42 gIdentifierInfoInitOnce.reset();
44 return TRUE; 43 return TRUE;
45 } 44 }
46 45
47 U_CDECL_BEGIN 46 static void U_CALLCONV
48 static UBool U_CALLCONV 47 IdentifierInfo_init(UErrorCode &status) {
49 IdentifierInfo_cleanup(void) { 48 ASCII = new UnicodeSet(0, 0x7f);
50 return IdentifierInfo::cleanup(); 49 JAPANESE = new ScriptSet();
50 CHINESE = new ScriptSet();
51 KOREAN = new ScriptSet();
52 CONFUSABLE_WITH_LATIN = new ScriptSet();
53 if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
54 || CONFUSABLE_WITH_LATIN == NULL) {
55 status = U_MEMORY_ALLOCATION_ERROR;
56 return;
57 }
58 ASCII->freeze();
59 JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HI RAGANA, status)
60 .set(USCRIPT_KATAKANA, status);
61 CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOP OMOFO, status);
62 KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANG UL, status);
63 CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, stat us)
64 .set(USCRIPT_CHEROKEE, status);
65 ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup) ;
51 } 66 }
52 U_CDECL_END 67 U_CDECL_END
53 68
54 69
55 IdentifierInfo::IdentifierInfo(UErrorCode &status): 70 IdentifierInfo::IdentifierInfo(UErrorCode &status):
56 fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), 71 fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
57 fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) { 72 fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
73 umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
58 if (U_FAILURE(status)) { 74 if (U_FAILURE(status)) {
59 return; 75 return;
60 } 76 }
61 { 77
62 Mutex lock(&gInitMutex);
63 if (!gStaticsAreInitialized) {
64 ASCII = new UnicodeSet(0, 0x7f);
65 JAPANESE = new ScriptSet();
66 CHINESE = new ScriptSet();
67 KOREAN = new ScriptSet();
68 CONFUSABLE_WITH_LATIN = new ScriptSet();
69 if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN = = NULL
70 || CONFUSABLE_WITH_LATIN == NULL) {
71 status = U_MEMORY_ALLOCATION_ERROR;
72 return;
73 }
74 ASCII->freeze();
75 JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(US CRIPT_HIRAGANA, status)
76 .set(USCRIPT_KATAKANA, status);
77 CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USC RIPT_BOPOMOFO, status);
78 KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCR IPT_HANGUL, status);
79 CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GRE EK, status)
80 .set(USCRIPT_CHEROKEE, status);
81 ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_ cleanup);
82 gStaticsAreInitialized = TRUE;
83 }
84 }
85 fIdentifier = new UnicodeString(); 78 fIdentifier = new UnicodeString();
86 fRequiredScripts = new ScriptSet(); 79 fRequiredScripts = new ScriptSet();
87 fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL , &status); 80 fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL , &status);
88 uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); 81 uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
89 fCommonAmongAlternates = new ScriptSet(); 82 fCommonAmongAlternates = new ScriptSet();
90 fNumerics = new UnicodeSet(); 83 fNumerics = new UnicodeSet();
91 fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); 84 fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
92 85
93 if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL || 86 if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
94 fCommonAmongAlternates == NULL || fNumerics == NUL L || fIdentifierProfile == NULL)) { 87 fCommonAmongAlternates == NULL || fNumerics == NUL L || fIdentifierProfile == NULL)) {
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
135 ScriptSet scriptsForCP; 128 ScriptSet scriptsForCP;
136 UChar32 cp; 129 UChar32 cp;
137 for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { 130 for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
138 cp = identifier.char32At(i); 131 cp = identifier.char32At(i);
139 // Store a representative character for each kind of decimal digit 132 // Store a representative character for each kind of decimal digit
140 if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { 133 if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
141 // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value 134 // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
142 fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); 135 fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
143 } 136 }
144 UScriptCode extensions[500]; 137 UScriptCode extensions[500];
145 int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LE NGTHOF(extensions), &status); 138 int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UP RV_LENGTHOF(extensions), &status);
146 if (U_FAILURE(status)) { 139 if (U_FAILURE(status)) {
147 return *this; 140 return *this;
148 } 141 }
149 scriptsForCP.resetAll(); 142 scriptsForCP.resetAll();
150 for (int32_t j=0; j<extensionsCount; j++) { 143 for (int32_t j=0; j<extensionsCount; j++) {
151 scriptsForCP.set(extensions[j], status); 144 scriptsForCP.set(extensions[j], status);
152 } 145 }
153 scriptsForCP.reset(USCRIPT_COMMON, status); 146 scriptsForCP.reset(USCRIPT_COMMON, status);
154 scriptsForCP.reset(USCRIPT_INHERITED, status); 147 scriptsForCP.reset(USCRIPT_INHERITED, status);
155 switch (scriptsForCP.countMembers()) { 148 switch (scriptsForCP.countMembers()) {
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 // This is a bit tricky. We look at a number of factors. 233 // This is a bit tricky. We look at a number of factors.
241 // The number of scripts in the text. 234 // The number of scripts in the text.
242 // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc]) 235 // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
243 // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.) 236 // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
244 237
245 // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the 238 // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
246 // time it is created, in setIdentifier(). 239 // time it is created, in setIdentifier().
247 int32_t cardinalityPlus = fRequiredScripts->countMembers() + 240 int32_t cardinalityPlus = fRequiredScripts->countMembers() +
248 (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSe tSet) : 1); 241 (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSe tSet) : 1);
249 if (cardinalityPlus < 2) { 242 if (cardinalityPlus < 2) {
250 return USPOOF_HIGHLY_RESTRICTIVE; 243 return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
251 } 244 }
252 if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlte rnates(*CHINESE, *fRequiredScripts) 245 if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlte rnates(*CHINESE, *fRequiredScripts)
253 || containsWithAlternates(*KOREAN, *fRequiredScripts)) { 246 || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
254 return USPOOF_HIGHLY_RESTRICTIVE; 247 return USPOOF_HIGHLY_RESTRICTIVE;
255 } 248 }
256 if (cardinalityPlus == 2 && 249 if (cardinalityPlus == 2 &&
257 fRequiredScripts->test(USCRIPT_LATIN, status) && 250 fRequiredScripts->test(USCRIPT_LATIN, status) &&
258 !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { 251 !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
259 return USPOOF_MODERATELY_RESTRICTIVE; 252 return USPOOF_MODERATELY_RESTRICTIVE;
260 } 253 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
309 dest.append(separator); 302 dest.append(separator);
310 } 303 }
311 ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); 304 ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
312 ss->displayScripts(dest); 305 ss->displayScripts(dest);
313 } 306 }
314 return dest; 307 return dest;
315 } 308 }
316 309
317 U_NAMESPACE_END 310 U_NAMESPACE_END
318 311
OLDNEW
« no previous file with comments | « source/i18n/identifier_info.h ('k') | source/i18n/indiancal.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698