Chromium Code Reviews
|
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | |
| 2 // Redistribution and use in source and binary forms, with or without | |
| 3 // modification, are permitted provided that the following conditions are | |
| 4 // met: | |
| 5 // | |
| 6 // * Redistributions of source code must retain the above copyright | |
| 7 // notice, this list of conditions and the following disclaimer. | |
| 8 // * Redistributions in binary form must reproduce the above | |
| 9 // copyright notice, this list of conditions and the following | |
| 10 // disclaimer in the documentation and/or other materials provided | |
| 11 // with the distribution. | |
| 12 // * Neither the name of Google Inc. nor the names of its | |
| 13 // contributors may be used to endorse or promote products derived | |
| 14 // from this software without specific prior written permission. | |
| 15 // | |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements | |
| 29 // language matching API. | |
| 30 | |
| 31 #include "language-matcher.h" | |
| 32 | |
| 33 #include "platform.h" | |
| 34 #include "unicode/datefmt.h" // For getAvailableLocales | |
| 35 #include "unicode/locid.h" | |
| 36 #include "unicode/uloc.h" | |
| 37 #include "utils.h" | |
| 38 | |
| 39 namespace v8 { | |
| 40 namespace internal { | |
| 41 | |
| 42 const unsigned int LanguageMatcher::kLanguageWeight = 75; | |
| 43 const unsigned int LanguageMatcher::kScriptWeight = 20; | |
| 44 const unsigned int LanguageMatcher::kRegionWeight = 5; | |
| 45 const unsigned int LanguageMatcher::kThreshold = 50; | |
| 46 const unsigned int LanguageMatcher::kPositionBonus = 1; | |
| 47 const char* const LanguageMatcher::kDefaultLocale = "root"; | |
| 48 | |
| 49 static const char* GetLanguageException(const char*); | |
| 50 static void BCP47ToICUFormat(v8::Handle<v8::String>, char*); | |
| 51 static int CompareLocaleSubtags(const char*, const char*); | |
| 52 static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*); | |
| 53 | |
| 54 LocaleIDMatch::LocaleIDMatch() | |
| 55 : score(-1) { | |
| 56 OS::SNPrintF(Vector<char>(bcp47_id, ULOC_FULLNAME_CAPACITY), | |
| 57 "%s", LanguageMatcher::kDefaultLocale); | |
| 58 OS::SNPrintF(Vector<char>(icu_id, ULOC_FULLNAME_CAPACITY), | |
| 59 "%s", LanguageMatcher::kDefaultLocale); | |
| 60 } | |
| 61 | |
| 62 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) { | |
| 63 OS::SNPrintF(Vector<char>(this->bcp47_id, ULOC_FULLNAME_CAPACITY), | |
| 64 "%s", rhs.bcp47_id); | |
| 65 OS::SNPrintF(Vector<char>(this->icu_id, ULOC_FULLNAME_CAPACITY), | |
| 66 "%s", rhs.icu_id); | |
| 67 this->score = rhs.score; | |
| 68 | |
| 69 return *this; | |
| 70 } | |
| 71 | |
| 72 // static | |
| 73 void LanguageMatcher::GetBestMatchForPriorityList( | |
| 74 v8::Handle<v8::Array> locales, LocaleIDMatch* result) { | |
| 75 v8::HandleScope handle_scope; | |
| 76 | |
| 77 unsigned int position_bonus = locales->Length() * kPositionBonus; | |
| 78 | |
| 79 int max_score = 0; | |
| 80 LocaleIDMatch match; | |
| 81 for (unsigned int i = 0; i < locales->Length(); ++i) { | |
| 82 position_bonus -= kPositionBonus; | |
| 83 | |
| 84 v8::TryCatch try_catch; | |
| 85 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i)); | |
| 86 | |
| 87 // Return default if exception is raised when reading parameter. | |
| 88 if (try_catch.HasCaught()) break; | |
| 89 | |
| 90 // JavaScript arrays can be heterogenous so check each item | |
| 91 // if it's a string. | |
| 92 if (!locale_id->IsString()) continue; | |
| 93 | |
| 94 CompareToSupportedLocaleIDList(locale_id->ToString(), &match); | |
|
jungshik at Google
2011/05/03 20:04:05
Perhaps, you can check the return value and skip i
Nebojša Ćirić
2011/05/03 20:39:00
Done.
| |
| 95 | |
| 96 // Skip items under threshold. | |
| 97 if (match.score < kThreshold) continue; | |
| 98 | |
| 99 match.score += position_bonus; | |
| 100 if (match.score > max_score) { | |
| 101 *result = match; | |
| 102 | |
| 103 max_score = match.score; | |
| 104 } | |
| 105 } | |
| 106 } | |
| 107 | |
| 108 // static | |
| 109 void LanguageMatcher::GetBestMatchForString( | |
| 110 v8::Handle<v8::String> locale, LocaleIDMatch* result) { | |
| 111 LocaleIDMatch match; | |
| 112 CompareToSupportedLocaleIDList(locale, &match); | |
| 113 | |
| 114 if (match.score >= kThreshold) { | |
| 115 *result = match; | |
| 116 } | |
|
jungshik at Google
2011/05/03 20:04:05
How about this?
if (CompareToSupportedLocaleIDLi
Nebojša Ćirić
2011/05/03 20:39:00
Done.
| |
| 117 } | |
| 118 | |
| 119 // static | |
| 120 void LanguageMatcher::CompareToSupportedLocaleIDList( | |
| 121 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) { | |
| 122 static int32_t available_count = 0; | |
| 123 // Locale::getAvailableLocales would return 100+ locales, with more than 70 | |
| 124 // with little data. DateFormat variant returns well populated Locales only. | |
|
jungshik at Google
2011/05/03 20:04:05
The above comment is specific to Chrome's ICU buil
Nebojša Ćirić
2011/05/03 20:39:00
Done.
| |
| 125 // TODO(cira): Maybe make this thread-safe? | |
| 126 static const icu::Locale* available_locales = | |
| 127 icu::DateFormat::getAvailableLocales(available_count); | |
| 128 | |
| 129 // Skip this locale_id if it's not in ASCII. | |
| 130 static LocaleIDMatch default_match; | |
| 131 v8::String::AsciiValue is_ascii(locale_id); | |
| 132 if (*is_ascii == NULL) { | |
| 133 *result = default_match; | |
| 134 return; | |
| 135 } | |
| 136 | |
| 137 char locale[ULOC_FULLNAME_CAPACITY]; | |
| 138 BCP47ToICUFormat(locale_id, locale); | |
| 139 icu::Locale input_locale(locale); | |
| 140 | |
| 141 // Position of the best match locale in list of available locales. | |
| 142 int position = -1; | |
| 143 result->score = 0; | |
| 144 const char* language = GetLanguageException(input_locale.getLanguage()); | |
| 145 const char* script = input_locale.getScript(); | |
| 146 const char* region = input_locale.getCountry(); | |
| 147 for (int32_t i = 0; i < available_count; ++i) { | |
| 148 int current_score = 0; | |
| 149 int sign = | |
| 150 CompareLocaleSubtags(language, available_locales[i].getLanguage()); | |
| 151 current_score += sign * kLanguageWeight; | |
| 152 | |
| 153 sign = CompareLocaleSubtags(script, available_locales[i].getScript()); | |
| 154 current_score += sign * kScriptWeight; | |
| 155 | |
| 156 sign = CompareLocaleSubtags(region, available_locales[i].getCountry()); | |
| 157 current_score += sign * kRegionWeight; | |
| 158 | |
| 159 if (current_score > result->score) { | |
| 160 result->score = current_score; | |
| 161 position = i; | |
| 162 } | |
| 163 } | |
| 164 | |
| 165 if (result->score < kThreshold || position == -1) { | |
| 166 *result = default_match; | |
| 167 return; | |
| 168 } | |
| 169 | |
| 170 BuildLocaleName(available_locales[position].getBaseName(), | |
| 171 input_locale.getName(), result); | |
|
jungshik at Google
2011/05/03 20:04:05
If we just want to throw away the return value, th
Nebojša Ćirić
2011/05/03 20:39:00
Done.
| |
| 172 } | |
| 173 | |
| 174 // For some unsupported language subtags it is better to fallback to related | |
| 175 // language that is supported than to default. | |
| 176 static const char* GetLanguageException(const char* language) { | |
| 177 // Serbo-croatian to Serbian. | |
| 178 if (!strcmp(language, "sh")) return "sr"; | |
| 179 | |
| 180 // Norweigan to Norweiaan to Norwegian Bokmal. | |
| 181 if (!strcmp(language, "no")) return "nb"; | |
| 182 | |
| 183 // Moldavian to Romanian. | |
| 184 if (!strcmp(language, "mo")) return "ro"; | |
| 185 | |
| 186 // Tagalog to Filipino. | |
| 187 if (!strcmp(language, "tl")) return "fil"; | |
| 188 | |
| 189 return language; | |
| 190 } | |
| 191 | |
| 192 // Converts user input from BCP47 locale id format to ICU compatible format. | |
| 193 static void BCP47ToICUFormat(v8::Handle<v8::String> locale_id, char* locale) { | |
| 194 UErrorCode status = U_ZERO_ERROR; | |
| 195 int32_t locale_size = 0; | |
| 196 uloc_forLanguageTag(*v8::String::Utf8Value(locale_id), locale, | |
| 197 ULOC_FULLNAME_CAPACITY, &locale_size, &status); | |
| 198 } | |
| 199 | |
| 200 // Compares locale id subtags. | |
| 201 // Returns 1 for match or -1 for mismatch. | |
| 202 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) { | |
| 203 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1; | |
| 204 } | |
| 205 | |
| 206 // Builds a BCP47 compliant locale id from base name of matched locale and | |
| 207 // full user specified locale. | |
| 208 // Returns false if uloc_toLanguageTag failed to convert locale id. | |
| 209 // Example: | |
| 210 // base_name of matched locale (ICU ID): de_DE | |
| 211 // input_locale_name (ICU ID): de_AT@collation=phonebk | |
| 212 // result (ICU ID): de_DE@collation=phonebk | |
| 213 // result (BCP47 ID): de-DE-u-co-phonebk | |
| 214 static bool BuildLocaleName(const char* base_name, | |
| 215 const char* input_locale_name, | |
| 216 LocaleIDMatch* result) { | |
| 217 // Get extensions (if any) from the original locale. | |
| 218 const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR); | |
| 219 if (extension != NULL) { | |
| 220 OS::SNPrintF(Vector<char>(result->icu_id, ULOC_FULLNAME_CAPACITY), | |
| 221 "%s%s", base_name, extension); | |
| 222 } else { | |
| 223 OS::SNPrintF(Vector<char>(result->icu_id, ULOC_FULLNAME_CAPACITY), | |
| 224 "%s", base_name); | |
| 225 } | |
| 226 | |
| 227 // Convert ICU locale name into BCP47 format. | |
| 228 UErrorCode status = U_ZERO_ERROR; | |
| 229 uloc_toLanguageTag(result->icu_id, result->bcp47_id, | |
| 230 ULOC_FULLNAME_CAPACITY, false, &status); | |
| 231 return !U_FAILURE(status); | |
| 232 } | |
| 233 | |
| 234 } } // namespace v8::internal | |
| OLD | NEW |