| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2012-2013, International Business Machines | 3 * Copyright (C) 2012-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
| 9 | 9 |
| 10 #include "unicode/uchar.h" | 10 #include "unicode/uchar.h" |
| 11 #include "unicode/utf16.h" | 11 #include "unicode/utf16.h" |
| 12 | 12 |
| 13 #include "identifier_info.h" | 13 #include "identifier_info.h" |
| 14 #include "mutex.h" | 14 #include "mutex.h" |
| 15 #include "scriptset.h" | 15 #include "scriptset.h" |
| 16 #include "ucln_in.h" | 16 #include "ucln_in.h" |
| 17 #include "uvector.h" | 17 #include "uvector.h" |
| 18 | 18 |
| 19 U_NAMESPACE_BEGIN | 19 U_NAMESPACE_BEGIN |
| 20 | 20 |
| 21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | 21 static UnicodeSet *ASCII; |
| 22 static ScriptSet *JAPANESE; |
| 23 static ScriptSet *CHINESE; |
| 24 static ScriptSet *KOREAN; |
| 25 static ScriptSet *CONFUSABLE_WITH_LATIN; |
| 26 static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER; |
| 22 | 27 |
| 23 static UMutex gInitMutex = U_MUTEX_INITIALIZER; | |
| 24 static UBool gStaticsAreInitialized = FALSE; | |
| 25 | 28 |
| 26 UnicodeSet *IdentifierInfo::ASCII; | 29 U_CDECL_BEGIN |
| 27 ScriptSet *IdentifierInfo::JAPANESE; | 30 static UBool U_CALLCONV |
| 28 ScriptSet *IdentifierInfo::CHINESE; | 31 IdentifierInfo_cleanup(void) { |
| 29 ScriptSet *IdentifierInfo::KOREAN; | |
| 30 ScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN; | |
| 31 | |
| 32 UBool IdentifierInfo::cleanup() { | |
| 33 delete ASCII; | 32 delete ASCII; |
| 34 ASCII = NULL; | 33 ASCII = NULL; |
| 35 delete JAPANESE; | 34 delete JAPANESE; |
| 36 JAPANESE = NULL; | 35 JAPANESE = NULL; |
| 37 delete CHINESE; | 36 delete CHINESE; |
| 38 CHINESE = NULL; | 37 CHINESE = NULL; |
| 39 delete KOREAN; | 38 delete KOREAN; |
| 40 KOREAN = NULL; | 39 KOREAN = NULL; |
| 41 delete CONFUSABLE_WITH_LATIN; | 40 delete CONFUSABLE_WITH_LATIN; |
| 42 CONFUSABLE_WITH_LATIN = NULL; | 41 CONFUSABLE_WITH_LATIN = NULL; |
| 43 gStaticsAreInitialized = FALSE; | 42 gIdentifierInfoInitOnce.reset(); |
| 44 return TRUE; | 43 return TRUE; |
| 45 } | 44 } |
| 46 | 45 |
| 47 U_CDECL_BEGIN | 46 static void U_CALLCONV |
| 48 static UBool U_CALLCONV | 47 IdentifierInfo_init(UErrorCode &status) { |
| 49 IdentifierInfo_cleanup(void) { | 48 ASCII = new UnicodeSet(0, 0x7f); |
| 50 return IdentifierInfo::cleanup(); | 49 JAPANESE = new ScriptSet(); |
| 50 CHINESE = new ScriptSet(); |
| 51 KOREAN = new ScriptSet(); |
| 52 CONFUSABLE_WITH_LATIN = new ScriptSet(); |
| 53 if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL |
| 54 || CONFUSABLE_WITH_LATIN == NULL) { |
| 55 status = U_MEMORY_ALLOCATION_ERROR; |
| 56 return; |
| 57 } |
| 58 ASCII->freeze(); |
| 59 JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HI
RAGANA, status) |
| 60 .set(USCRIPT_KATAKANA, status); |
| 61 CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOP
OMOFO, status); |
| 62 KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANG
UL, status); |
| 63 CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, stat
us) |
| 64 .set(USCRIPT_CHEROKEE, status); |
| 65 ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup)
; |
| 51 } | 66 } |
| 52 U_CDECL_END | 67 U_CDECL_END |
| 53 | 68 |
| 54 | 69 |
| 55 IdentifierInfo::IdentifierInfo(UErrorCode &status): | 70 IdentifierInfo::IdentifierInfo(UErrorCode &status): |
| 56 fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), | 71 fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), |
| 57 fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL)
{ | 72 fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL)
{ |
| 73 umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status); |
| 58 if (U_FAILURE(status)) { | 74 if (U_FAILURE(status)) { |
| 59 return; | 75 return; |
| 60 } | 76 } |
| 61 { | 77 |
| 62 Mutex lock(&gInitMutex); | |
| 63 if (!gStaticsAreInitialized) { | |
| 64 ASCII = new UnicodeSet(0, 0x7f); | |
| 65 JAPANESE = new ScriptSet(); | |
| 66 CHINESE = new ScriptSet(); | |
| 67 KOREAN = new ScriptSet(); | |
| 68 CONFUSABLE_WITH_LATIN = new ScriptSet(); | |
| 69 if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN =
= NULL | |
| 70 || CONFUSABLE_WITH_LATIN == NULL) { | |
| 71 status = U_MEMORY_ALLOCATION_ERROR; | |
| 72 return; | |
| 73 } | |
| 74 ASCII->freeze(); | |
| 75 JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(US
CRIPT_HIRAGANA, status) | |
| 76 .set(USCRIPT_KATAKANA, status); | |
| 77 CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USC
RIPT_BOPOMOFO, status); | |
| 78 KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCR
IPT_HANGUL, status); | |
| 79 CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GRE
EK, status) | |
| 80 .set(USCRIPT_CHEROKEE, status); | |
| 81 ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_
cleanup); | |
| 82 gStaticsAreInitialized = TRUE; | |
| 83 } | |
| 84 } | |
| 85 fIdentifier = new UnicodeString(); | 78 fIdentifier = new UnicodeString(); |
| 86 fRequiredScripts = new ScriptSet(); | 79 fRequiredScripts = new ScriptSet(); |
| 87 fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL
, &status); | 80 fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL
, &status); |
| 88 uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); | 81 uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); |
| 89 fCommonAmongAlternates = new ScriptSet(); | 82 fCommonAmongAlternates = new ScriptSet(); |
| 90 fNumerics = new UnicodeSet(); | 83 fNumerics = new UnicodeSet(); |
| 91 fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); | 84 fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); |
| 92 | 85 |
| 93 if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL ||
fScriptSetSet == NULL || | 86 if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL ||
fScriptSetSet == NULL || |
| 94 fCommonAmongAlternates == NULL || fNumerics == NUL
L || fIdentifierProfile == NULL)) { | 87 fCommonAmongAlternates == NULL || fNumerics == NUL
L || fIdentifierProfile == NULL)) { |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 ScriptSet scriptsForCP; | 128 ScriptSet scriptsForCP; |
| 136 UChar32 cp; | 129 UChar32 cp; |
| 137 for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { | 130 for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { |
| 138 cp = identifier.char32At(i); | 131 cp = identifier.char32At(i); |
| 139 // Store a representative character for each kind of decimal digit | 132 // Store a representative character for each kind of decimal digit |
| 140 if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { | 133 if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { |
| 141 // Just store the zero character as a representative for comparison.
Unicode guarantees it is cp - value | 134 // Just store the zero character as a representative for comparison.
Unicode guarantees it is cp - value |
| 142 fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); | 135 fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); |
| 143 } | 136 } |
| 144 UScriptCode extensions[500]; | 137 UScriptCode extensions[500]; |
| 145 int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LE
NGTHOF(extensions), &status); | 138 int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UP
RV_LENGTHOF(extensions), &status); |
| 146 if (U_FAILURE(status)) { | 139 if (U_FAILURE(status)) { |
| 147 return *this; | 140 return *this; |
| 148 } | 141 } |
| 149 scriptsForCP.resetAll(); | 142 scriptsForCP.resetAll(); |
| 150 for (int32_t j=0; j<extensionsCount; j++) { | 143 for (int32_t j=0; j<extensionsCount; j++) { |
| 151 scriptsForCP.set(extensions[j], status); | 144 scriptsForCP.set(extensions[j], status); |
| 152 } | 145 } |
| 153 scriptsForCP.reset(USCRIPT_COMMON, status); | 146 scriptsForCP.reset(USCRIPT_COMMON, status); |
| 154 scriptsForCP.reset(USCRIPT_INHERITED, status); | 147 scriptsForCP.reset(USCRIPT_INHERITED, status); |
| 155 switch (scriptsForCP.countMembers()) { | 148 switch (scriptsForCP.countMembers()) { |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 240 // This is a bit tricky. We look at a number of factors. | 233 // This is a bit tricky. We look at a number of factors. |
| 241 // The number of scripts in the text. | 234 // The number of scripts in the text. |
| 242 // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa];
[Arab Syrc]) | 235 // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa];
[Arab Syrc]) |
| 243 // Plus number of alternates otherwise (this only works because we only test
cardinality up to 2.) | 236 // Plus number of alternates otherwise (this only works because we only test
cardinality up to 2.) |
| 244 | 237 |
| 245 // Note: the requiredScripts set omits COMMON and INHERITED; they are taken
out at the | 238 // Note: the requiredScripts set omits COMMON and INHERITED; they are taken
out at the |
| 246 // time it is created, in setIdentifier(). | 239 // time it is created, in setIdentifier(). |
| 247 int32_t cardinalityPlus = fRequiredScripts->countMembers() + | 240 int32_t cardinalityPlus = fRequiredScripts->countMembers() + |
| 248 (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSe
tSet) : 1); | 241 (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSe
tSet) : 1); |
| 249 if (cardinalityPlus < 2) { | 242 if (cardinalityPlus < 2) { |
| 250 return USPOOF_HIGHLY_RESTRICTIVE; | 243 return USPOOF_SINGLE_SCRIPT_RESTRICTIVE; |
| 251 } | 244 } |
| 252 if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlte
rnates(*CHINESE, *fRequiredScripts) | 245 if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlte
rnates(*CHINESE, *fRequiredScripts) |
| 253 || containsWithAlternates(*KOREAN, *fRequiredScripts)) { | 246 || containsWithAlternates(*KOREAN, *fRequiredScripts)) { |
| 254 return USPOOF_HIGHLY_RESTRICTIVE; | 247 return USPOOF_HIGHLY_RESTRICTIVE; |
| 255 } | 248 } |
| 256 if (cardinalityPlus == 2 && | 249 if (cardinalityPlus == 2 && |
| 257 fRequiredScripts->test(USCRIPT_LATIN, status) && | 250 fRequiredScripts->test(USCRIPT_LATIN, status) && |
| 258 !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { | 251 !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { |
| 259 return USPOOF_MODERATELY_RESTRICTIVE; | 252 return USPOOF_MODERATELY_RESTRICTIVE; |
| 260 } | 253 } |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 309 dest.append(separator); | 302 dest.append(separator); |
| 310 } | 303 } |
| 311 ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); | 304 ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); |
| 312 ss->displayScripts(dest); | 305 ss->displayScripts(dest); |
| 313 } | 306 } |
| 314 return dest; | 307 return dest; |
| 315 } | 308 } |
| 316 | 309 |
| 317 U_NAMESPACE_END | 310 U_NAMESPACE_END |
| 318 | 311 |
| OLD | NEW |