OLD | NEW |
(Empty) | |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are |
| 4 // met: |
| 5 // |
| 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided |
| 11 // with the distribution. |
| 12 // * Neither the name of Google Inc. nor the names of its |
| 13 // contributors may be used to endorse or promote products derived |
| 14 // from this software without specific prior written permission. |
| 15 // |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 |
| 28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements |
| 29 // language matching API. |
| 30 |
| 31 #include "language-matcher.h" |
| 32 |
| 33 #include "i18n-utils.h" |
| 34 #include "unicode/datefmt.h" // For getAvailableLocales |
| 35 #include "unicode/locid.h" |
| 36 #include "unicode/uloc.h" |
| 37 #include "utils.h" |
| 38 |
| 39 namespace v8 { |
| 40 namespace internal { |
| 41 |
| 42 const unsigned int LanguageMatcher::kLanguageWeight = 75; |
| 43 const unsigned int LanguageMatcher::kScriptWeight = 20; |
| 44 const unsigned int LanguageMatcher::kRegionWeight = 5; |
| 45 const unsigned int LanguageMatcher::kThreshold = 50; |
| 46 const unsigned int LanguageMatcher::kPositionBonus = 1; |
| 47 const char* const LanguageMatcher::kDefaultLocale = "root"; |
| 48 |
| 49 static const char* GetLanguageException(const char*); |
| 50 static bool BCP47ToICUFormat(const char*, char*); |
| 51 static int CompareLocaleSubtags(const char*, const char*); |
| 52 static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*); |
| 53 |
| 54 LocaleIDMatch::LocaleIDMatch() |
| 55 : score(-1) { |
| 56 I18NUtils::StrNCopy( |
| 57 bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale); |
| 58 |
| 59 I18NUtils::StrNCopy( |
| 60 icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale); |
| 61 } |
| 62 |
| 63 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) { |
| 64 I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id); |
| 65 I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id); |
| 66 this->score = rhs.score; |
| 67 |
| 68 return *this; |
| 69 } |
| 70 |
| 71 // static |
| 72 void LanguageMatcher::GetBestMatchForPriorityList( |
| 73 v8::Handle<v8::Array> locales, LocaleIDMatch* result) { |
| 74 v8::HandleScope handle_scope; |
| 75 |
| 76 unsigned int position_bonus = locales->Length() * kPositionBonus; |
| 77 |
| 78 int max_score = 0; |
| 79 LocaleIDMatch match; |
| 80 for (unsigned int i = 0; i < locales->Length(); ++i) { |
| 81 position_bonus -= kPositionBonus; |
| 82 |
| 83 v8::TryCatch try_catch; |
| 84 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i)); |
| 85 |
| 86 // Return default if exception is raised when reading parameter. |
| 87 if (try_catch.HasCaught()) break; |
| 88 |
| 89 // JavaScript arrays can be heterogenous so check each item |
| 90 // if it's a string. |
| 91 if (!locale_id->IsString()) continue; |
| 92 |
| 93 if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) { |
| 94 continue; |
| 95 } |
| 96 |
| 97 // Skip items under threshold. |
| 98 if (match.score < kThreshold) continue; |
| 99 |
| 100 match.score += position_bonus; |
| 101 if (match.score > max_score) { |
| 102 *result = match; |
| 103 |
| 104 max_score = match.score; |
| 105 } |
| 106 } |
| 107 } |
| 108 |
| 109 // static |
| 110 void LanguageMatcher::GetBestMatchForString( |
| 111 v8::Handle<v8::String> locale, LocaleIDMatch* result) { |
| 112 LocaleIDMatch match; |
| 113 |
| 114 if (CompareToSupportedLocaleIDList(locale, &match) && |
| 115 match.score >= kThreshold) { |
| 116 *result = match; |
| 117 } |
| 118 } |
| 119 |
| 120 // static |
| 121 bool LanguageMatcher::CompareToSupportedLocaleIDList( |
| 122 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) { |
| 123 static int32_t available_count = 0; |
| 124 // Depending on how ICU data is built, locales returned by |
| 125 // Locale::getAvailableLocale() are not guaranteed to support DateFormat, |
| 126 // Collation and other services. We can call getAvailableLocale() of all the |
| 127 // services we want to support and take the intersection of them all, but |
| 128 // using DateFormat::getAvailableLocales() should suffice. |
| 129 // TODO(cira): Maybe make this thread-safe? |
| 130 static const icu::Locale* available_locales = |
| 131 icu::DateFormat::getAvailableLocales(available_count); |
| 132 |
| 133 // Skip this locale_id if it's not in ASCII. |
| 134 static LocaleIDMatch default_match; |
| 135 v8::String::AsciiValue ascii_value(locale_id); |
| 136 if (*ascii_value == NULL) return false; |
| 137 |
| 138 char locale[ULOC_FULLNAME_CAPACITY]; |
| 139 if (!BCP47ToICUFormat(*ascii_value, locale)) return false; |
| 140 |
| 141 icu::Locale input_locale(locale); |
| 142 |
| 143 // Position of the best match locale in list of available locales. |
| 144 int position = -1; |
| 145 const char* language = GetLanguageException(input_locale.getLanguage()); |
| 146 const char* script = input_locale.getScript(); |
| 147 const char* region = input_locale.getCountry(); |
| 148 for (int32_t i = 0; i < available_count; ++i) { |
| 149 int current_score = 0; |
| 150 int sign = |
| 151 CompareLocaleSubtags(language, available_locales[i].getLanguage()); |
| 152 current_score += sign * kLanguageWeight; |
| 153 |
| 154 sign = CompareLocaleSubtags(script, available_locales[i].getScript()); |
| 155 current_score += sign * kScriptWeight; |
| 156 |
| 157 sign = CompareLocaleSubtags(region, available_locales[i].getCountry()); |
| 158 current_score += sign * kRegionWeight; |
| 159 |
| 160 if (current_score >= kThreshold && current_score > result->score) { |
| 161 result->score = current_score; |
| 162 position = i; |
| 163 } |
| 164 } |
| 165 |
| 166 // Didn't find any good matches so use defaults. |
| 167 if (position == -1) return false; |
| 168 |
| 169 return BuildLocaleName(available_locales[position].getBaseName(), |
| 170 input_locale.getName(), result); |
| 171 } |
| 172 |
| 173 // For some unsupported language subtags it is better to fallback to related |
| 174 // language that is supported than to default. |
| 175 static const char* GetLanguageException(const char* language) { |
| 176 // Serbo-croatian to Serbian. |
| 177 if (!strcmp(language, "sh")) return "sr"; |
| 178 |
| 179 // Norweigan to Norweiaan to Norwegian Bokmal. |
| 180 if (!strcmp(language, "no")) return "nb"; |
| 181 |
| 182 // Moldavian to Romanian. |
| 183 if (!strcmp(language, "mo")) return "ro"; |
| 184 |
| 185 // Tagalog to Filipino. |
| 186 if (!strcmp(language, "tl")) return "fil"; |
| 187 |
| 188 return language; |
| 189 } |
| 190 |
| 191 // Converts user input from BCP47 locale id format to ICU compatible format. |
| 192 // Returns false if uloc_forLanguageTag call fails or if extension is too long. |
| 193 static bool BCP47ToICUFormat(const char* locale_id, char* result) { |
| 194 UErrorCode status = U_ZERO_ERROR; |
| 195 int32_t locale_size = 0; |
| 196 |
| 197 char locale[ULOC_FULLNAME_CAPACITY]; |
| 198 I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id); |
| 199 |
| 200 // uloc_forLanguageTag has a bug where long extension can crash the code. |
| 201 // We need to check if extension part of language id conforms to the length. |
| 202 // ICU bug: http://bugs.icu-project.org/trac/ticket/8519 |
| 203 const char* extension = strstr(locale_id, "-u-"); |
| 204 if (extension != NULL && |
| 205 strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) { |
| 206 // Truncate to get non-crashing string, but still preserve base language. |
| 207 int base_length = strlen(locale_id) - strlen(extension); |
| 208 locale[base_length] = '\0'; |
| 209 } |
| 210 |
| 211 uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY, |
| 212 &locale_size, &status); |
| 213 return !U_FAILURE(status); |
| 214 } |
| 215 |
| 216 // Compares locale id subtags. |
| 217 // Returns 1 for match or -1 for mismatch. |
| 218 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) { |
| 219 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1; |
| 220 } |
| 221 |
| 222 // Builds a BCP47 compliant locale id from base name of matched locale and |
| 223 // full user specified locale. |
| 224 // Returns false if uloc_toLanguageTag failed to convert locale id. |
| 225 // Example: |
| 226 // base_name of matched locale (ICU ID): de_DE |
| 227 // input_locale_name (ICU ID): de_AT@collation=phonebk |
| 228 // result (ICU ID): de_DE@collation=phonebk |
| 229 // result (BCP47 ID): de-DE-u-co-phonebk |
| 230 static bool BuildLocaleName(const char* base_name, |
| 231 const char* input_locale_name, |
| 232 LocaleIDMatch* result) { |
| 233 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name); |
| 234 |
| 235 // Get extensions (if any) from the original locale. |
| 236 const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR); |
| 237 if (extension != NULL) { |
| 238 I18NUtils::StrNCopy(result->icu_id + strlen(base_name), |
| 239 ULOC_KEYWORD_AND_VALUES_CAPACITY, extension); |
| 240 } else { |
| 241 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name); |
| 242 } |
| 243 |
| 244 // Convert ICU locale name into BCP47 format. |
| 245 UErrorCode status = U_ZERO_ERROR; |
| 246 uloc_toLanguageTag(result->icu_id, result->bcp47_id, |
| 247 ULOC_FULLNAME_CAPACITY, false, &status); |
| 248 return !U_FAILURE(status); |
| 249 } |
| 250 |
| 251 } } // namespace v8::internal |
OLD | NEW |