Chromium Code Reviews| Index: src/extensions/experimental/language-matcher.cc |
| =================================================================== |
| --- src/extensions/experimental/language-matcher.cc (revision 0) |
| +++ src/extensions/experimental/language-matcher.cc (revision 0) |
| @@ -0,0 +1,210 @@ |
| +// Copyright 2011 the V8 project authors. All rights reserved. |
| +// Redistribution and use in source and binary forms, with or without |
| +// modification, are permitted provided that the following conditions are |
| +// met: |
| +// |
| +// * Redistributions of source code must retain the above copyright |
| +// notice, this list of conditions and the following disclaimer. |
| +// * Redistributions in binary form must reproduce the above |
| +// copyright notice, this list of conditions and the following |
| +// disclaimer in the documentation and/or other materials provided |
| +// with the distribution. |
| +// * Neither the name of Google Inc. nor the names of its |
| +// contributors may be used to endorse or promote products derived |
| +// from this software without specific prior written permission. |
| +// |
| +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| + |
| +// TODO(cira): Remove LanguageMatcher from v8 when ICU implements |
| +// language matching API. |
| + |
| +#include "language-matcher.h" |
| + |
| +#include "unicode/locid.h" |
| +#include "unicode/uloc.h" |
| +#include "utils.h" // For Min/Max |
| + |
| +namespace v8 { |
| +namespace internal { |
| + |
| +const unsigned int LanguageMatcher::kLanguageDistance = 75; |
|
jungshik at Google
2011/05/02 19:57:23
nit: again, this is not a distance but a score/wei
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +const unsigned int LanguageMatcher::kScriptDistance = 20; |
| +const unsigned int LanguageMatcher::kRegionDistance = 5; |
| +const unsigned int LanguageMatcher::kThreshold = 50; |
| +const unsigned int LanguageMatcher::kPositionBonus = 1; |
| +const char* const LanguageMatcher::kDefaultLocale = "root"; |
| + |
| +static const char* GetLanguageException(const char*); |
| +static void BCP47ToICUFormat(v8::Handle<v8::String>, char*); |
| +static int CompareLocaleSubtags(const char*, const char*); |
| +static void BuildLocaleName(const char*, const char*, LocaleIDMatch*); |
| + |
| +LocaleIDMatch::LocaleIDMatch() |
| + : rank(-1) { |
| + snprintf(bcp47_locale, ULOC_FULLNAME_CAPACITY, |
| + "%s", LanguageMatcher::kDefaultLocale); |
| + snprintf(icu_locale, ULOC_FULLNAME_CAPACITY, |
| + "%s", LanguageMatcher::kDefaultLocale); |
| +} |
| + |
| +LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) { |
| + snprintf(this->bcp47_locale, ULOC_FULLNAME_CAPACITY, |
| + "%s", rhs.bcp47_locale); |
| + snprintf(this->icu_locale, ULOC_FULLNAME_CAPACITY, |
| + "%s", rhs.icu_locale); |
| + this->rank = rhs.rank; |
| +} |
| + |
| +// static |
| +void LanguageMatcher::GetBestMatchForPriorityList( |
| + v8::Handle<v8::Array> locales, LocaleIDMatch* result) { |
| + v8::HandleScope handle_scope; |
| + |
| + unsigned int position_bonus = locales->Length() * kPositionBonus; |
| + |
| + int max_rank = 0; |
| + LocaleIDMatch match; |
| + for (unsigned int i = 0; i < locales->Length(); ++i) { |
| + position_bonus -= kPositionBonus; |
| + |
| + v8::TryCatch try_catch; |
| + v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i)); |
| + |
| + // Return default if exception is raised when reading parameter. |
| + if (try_catch.HasCaught()) break; |
| + |
| + // JavaScript arrays can be heterogenous so check each item |
| + // if it's a string. |
| + if (!locale_id->IsString()) continue; |
| + |
| + CompareToSupportedLocaleIDList(locale_id->ToString(), &match); |
| + |
| + // Skip items under threshold. |
| + if (match.rank < kThreshold) continue; |
| + |
| + match.rank += position_bonus; |
| + if (match.rank > max_rank) { |
| + *result = match; |
| + |
| + max_rank = match.rank; |
| + } |
| + } |
| +} |
| + |
| +// static |
| +void LanguageMatcher::GetBestMatchForString( |
| + v8::Handle<v8::String> locale, LocaleIDMatch* result) { |
| + LocaleIDMatch match; |
| + CompareToSupportedLocaleIDList(locale, &match); |
| + |
| + if (match.rank >= kThreshold) { |
| + *result = match; |
| + } |
| +} |
| + |
| +// static |
| +void LanguageMatcher::CompareToSupportedLocaleIDList( |
| + v8::Handle<v8::String> locale_id, LocaleIDMatch* result) { |
| + static int32_t available_count = 0; |
| + static const icu::Locale* available_locales = |
| + icu::Locale::getAvailableLocales(available_count); |
|
jungshik at Google
2011/05/02 19:57:23
Don't we need any lock around here?
More importan
Nebojša Ćirić
2011/05/02 22:44:01
Added a TODO, but I don't think it's an issue. One
|
| + |
| + char locale[ULOC_FULLNAME_CAPACITY]; |
| + BCP47ToICUFormat(locale_id, locale); |
| + icu::Locale user_locale(locale); |
|
jungshik at Google
2011/05/02 19:57:23
how about input_locale?
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| + |
| + // Position of the best match locale in list of available locales. |
| + int position = -1; |
| + result->rank = 0; |
| + const char* language = GetLanguageException(user_locale.getLanguage()); |
| + const char* script = user_locale.getScript(); |
| + const char* region = user_locale.getCountry(); |
| + for (int32_t i = 0; i < available_count; ++i) { |
| + int current_rank = 0; |
| + int sign = |
| + CompareLocaleSubtags(language, available_locales[i].getLanguage()); |
| + current_rank += sign * kLanguageDistance; |
| + |
| + sign = CompareLocaleSubtags(script, available_locales[i].getScript()); |
| + current_rank += sign * kScriptDistance; |
| + |
| + sign = CompareLocaleSubtags(region, available_locales[i].getCountry()); |
| + current_rank += sign * kRegionDistance; |
| + |
| + if (current_rank > result->rank) { |
| + result->rank = current_rank; |
| + position = i; |
| + } |
| + } |
| + |
| + if (result->rank < kThreshold || position == -1) { |
| + LocaleIDMatch default_match; |
| + *result = default_match; |
| + return; |
| + } |
| + |
| + BuildLocaleName(available_locales[position].getBaseName(), |
| + user_locale.getName(), result); |
| +} |
| + |
| +// For some unsupported language subtags it is better to fallback to related |
| +// language that is supported than to default. |
| +static const char* GetLanguageException(const char* language) { |
| + if (!strcmp(language, "sh")) return "sr"; |
| + if (!strcmp(language, "no")) return "nb"; |
| + if (!strcmp(language, "mo")) return "ro"; |
| + if (!strcmp(language, "tl")) return "fil"; |
|
jungshik at Google
2011/05/02 19:57:23
Comment these entries? e.g. Tagalog => Filipino
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| + |
| + return language; |
| +} |
| + |
| +// Converts user input from BCP47 locale id format to ICU compatible format. |
| +static void BCP47ToICUFormat(v8::Handle<v8::String> locale_id, char* locale) { |
| + UErrorCode status = U_ZERO_ERROR; |
| + int32_t locale_size = 0; |
| + uloc_forLanguageTag(*v8::String::Utf8Value(locale_id), locale, |
|
jungshik at Google
2011/05/02 19:57:23
ASCIIValue?
Nebojša Ćirić
2011/05/02 22:44:01
Already done in parent method.
On 2011/05/02 19:5
|
| + ULOC_FULLNAME_CAPACITY, &locale_size, &status); |
| +} |
| + |
| +// Compares locale id subtags. |
| +// Returns 1 for match or-1 for mismatch. |
|
jungshik at Google
2011/05/02 19:57:23
nit: "or-1" -> "or -1"
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) { |
| + return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1; |
| +} |
| + |
| +// Builds a BCP47 compliant locale id from base name of matched locale and |
| +// full user specified locale. |
| +// Example: |
| +// base_name: de_DE |
|
jungshik at Google
2011/05/02 19:57:23
base_name of the matched locale (ICU ID): de_DE
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +// user_locale_name: de_AT@collation=phonebk |
|
jungshik at Google
2011/05/02 19:57:23
input_locale_name (ICU ID)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +// ICU compatible result: de_DE@collation=phonebk |
|
jungshik at Google
2011/05/02 19:57:23
result (ICU ID)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +// BCP47 compatible result: de-DE-u-co-phonebk |
|
jungshik at Google
2011/05/02 19:57:23
result (BCP 47)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +static void BuildLocaleName(const char* base_name, |
| + const char* user_locale_name, |
| + LocaleIDMatch* result) { |
| + // Get extensions (if any) from the original locale. |
| + const char* extension = strchr(user_locale_name, ULOC_KEYWORD_SEPARATOR); |
| + if (extension != NULL) { |
| + snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY, |
| + "%s%s", base_name, extension); |
| + } else { |
| + snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY, "%s", base_name); |
| + } |
| + |
| + // Convert ICU locale name into BCP47 format. |
| + UErrorCode status = U_ZERO_ERROR; |
| + uloc_toLanguageTag(result->icu_locale, result->bcp47_locale, |
| + ULOC_FULLNAME_CAPACITY, false, &status); |
|
jungshik at Google
2011/05/02 19:57:23
Instead of ignoring status, either assert U_SUCCES
Nebojša Ćirić
2011/05/02 22:44:01
Done.
|
| +} |
| + |
| +} } // namespace v8::internal |
| Property changes on: src/extensions/experimental/language-matcher.cc |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + LF |