Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(640)

Unified Diff: src/extensions/experimental/language-matcher.cc

Issue 6967005: Add new files missing in previous commit. (Closed) Base URL: https://v8.googlecode.com/svn/trunk
Patch Set: Add missing new files from previous commit. Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/extensions/experimental/language-matcher.h ('k') | src/preparse-data-format.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/extensions/experimental/language-matcher.cc
diff --git a/src/extensions/experimental/language-matcher.cc b/src/extensions/experimental/language-matcher.cc
new file mode 100644
index 0000000000000000000000000000000000000000..385ebfffff85be52730fd3b43d81b2de0a58284d
--- /dev/null
+++ b/src/extensions/experimental/language-matcher.cc
@@ -0,0 +1,251 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// TODO(cira): Remove LanguageMatcher from v8 when ICU implements
+// language matching API.
+
+#include "language-matcher.h"
+
+#include "i18n-utils.h"
+#include "unicode/datefmt.h" // For getAvailableLocales
+#include "unicode/locid.h"
+#include "unicode/uloc.h"
+#include "utils.h"
+
+namespace v8 {
+namespace internal {
+
+const unsigned int LanguageMatcher::kLanguageWeight = 75;
+const unsigned int LanguageMatcher::kScriptWeight = 20;
+const unsigned int LanguageMatcher::kRegionWeight = 5;
+const unsigned int LanguageMatcher::kThreshold = 50;
+const unsigned int LanguageMatcher::kPositionBonus = 1;
+const char* const LanguageMatcher::kDefaultLocale = "root";
+
+static const char* GetLanguageException(const char*);
+static bool BCP47ToICUFormat(const char*, char*);
+static int CompareLocaleSubtags(const char*, const char*);
+static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*);
+
+LocaleIDMatch::LocaleIDMatch()
+ : score(-1) {
+ I18NUtils::StrNCopy(
+ bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
+
+ I18NUtils::StrNCopy(
+ icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
+}
+
+LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
+ I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id);
+ I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id);
+ this->score = rhs.score;
+
+ return *this;
+}
+
+// static
+void LanguageMatcher::GetBestMatchForPriorityList(
+ v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
+ v8::HandleScope handle_scope;
+
+ unsigned int position_bonus = locales->Length() * kPositionBonus;
+
+ int max_score = 0;
+ LocaleIDMatch match;
+ for (unsigned int i = 0; i < locales->Length(); ++i) {
+ position_bonus -= kPositionBonus;
+
+ v8::TryCatch try_catch;
+ v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
+
+ // Return default if exception is raised when reading parameter.
+ if (try_catch.HasCaught()) break;
+
+ // JavaScript arrays can be heterogenous so check each item
+ // if it's a string.
+ if (!locale_id->IsString()) continue;
+
+ if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) {
+ continue;
+ }
+
+ // Skip items under threshold.
+ if (match.score < kThreshold) continue;
+
+ match.score += position_bonus;
+ if (match.score > max_score) {
+ *result = match;
+
+ max_score = match.score;
+ }
+ }
+}
+
+// static
+void LanguageMatcher::GetBestMatchForString(
+ v8::Handle<v8::String> locale, LocaleIDMatch* result) {
+ LocaleIDMatch match;
+
+ if (CompareToSupportedLocaleIDList(locale, &match) &&
+ match.score >= kThreshold) {
+ *result = match;
+ }
+}
+
+// static
+bool LanguageMatcher::CompareToSupportedLocaleIDList(
+ v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
+ static int32_t available_count = 0;
+ // Depending on how ICU data is built, locales returned by
+ // Locale::getAvailableLocale() are not guaranteed to support DateFormat,
+ // Collation and other services. We can call getAvailableLocale() of all the
+ // services we want to support and take the intersection of them all, but
+ // using DateFormat::getAvailableLocales() should suffice.
+ // TODO(cira): Maybe make this thread-safe?
+ static const icu::Locale* available_locales =
+ icu::DateFormat::getAvailableLocales(available_count);
+
+ // Skip this locale_id if it's not in ASCII.
+ static LocaleIDMatch default_match;
+ v8::String::AsciiValue ascii_value(locale_id);
+ if (*ascii_value == NULL) return false;
+
+ char locale[ULOC_FULLNAME_CAPACITY];
+ if (!BCP47ToICUFormat(*ascii_value, locale)) return false;
+
+ icu::Locale input_locale(locale);
+
+ // Position of the best match locale in list of available locales.
+ int position = -1;
+ const char* language = GetLanguageException(input_locale.getLanguage());
+ const char* script = input_locale.getScript();
+ const char* region = input_locale.getCountry();
+ for (int32_t i = 0; i < available_count; ++i) {
+ int current_score = 0;
+ int sign =
+ CompareLocaleSubtags(language, available_locales[i].getLanguage());
+ current_score += sign * kLanguageWeight;
+
+ sign = CompareLocaleSubtags(script, available_locales[i].getScript());
+ current_score += sign * kScriptWeight;
+
+ sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
+ current_score += sign * kRegionWeight;
+
+ if (current_score >= kThreshold && current_score > result->score) {
+ result->score = current_score;
+ position = i;
+ }
+ }
+
+ // Didn't find any good matches so use defaults.
+ if (position == -1) return false;
+
+ return BuildLocaleName(available_locales[position].getBaseName(),
+ input_locale.getName(), result);
+}
+
+// For some unsupported language subtags it is better to fallback to related
+// language that is supported than to default.
+static const char* GetLanguageException(const char* language) {
+ // Serbo-croatian to Serbian.
+ if (!strcmp(language, "sh")) return "sr";
+
+ // Norweigan to Norweiaan to Norwegian Bokmal.
+ if (!strcmp(language, "no")) return "nb";
+
+ // Moldavian to Romanian.
+ if (!strcmp(language, "mo")) return "ro";
+
+ // Tagalog to Filipino.
+ if (!strcmp(language, "tl")) return "fil";
+
+ return language;
+}
+
+// Converts user input from BCP47 locale id format to ICU compatible format.
+// Returns false if uloc_forLanguageTag call fails or if extension is too long.
+static bool BCP47ToICUFormat(const char* locale_id, char* result) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t locale_size = 0;
+
+ char locale[ULOC_FULLNAME_CAPACITY];
+ I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id);
+
+ // uloc_forLanguageTag has a bug where long extension can crash the code.
+ // We need to check if extension part of language id conforms to the length.
+ // ICU bug: http://bugs.icu-project.org/trac/ticket/8519
+ const char* extension = strstr(locale_id, "-u-");
+ if (extension != NULL &&
+ strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) {
+ // Truncate to get non-crashing string, but still preserve base language.
+ int base_length = strlen(locale_id) - strlen(extension);
+ locale[base_length] = '\0';
+ }
+
+ uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY,
+ &locale_size, &status);
+ return !U_FAILURE(status);
+}
+
+// Compares locale id subtags.
+// Returns 1 for match or -1 for mismatch.
+static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
+ return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
+}
+
+// Builds a BCP47 compliant locale id from base name of matched locale and
+// full user specified locale.
+// Returns false if uloc_toLanguageTag failed to convert locale id.
+// Example:
+// base_name of matched locale (ICU ID): de_DE
+// input_locale_name (ICU ID): de_AT@collation=phonebk
+// result (ICU ID): de_DE@collation=phonebk
+// result (BCP47 ID): de-DE-u-co-phonebk
+static bool BuildLocaleName(const char* base_name,
+ const char* input_locale_name,
+ LocaleIDMatch* result) {
+ I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
+
+ // Get extensions (if any) from the original locale.
+ const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR);
+ if (extension != NULL) {
+ I18NUtils::StrNCopy(result->icu_id + strlen(base_name),
+ ULOC_KEYWORD_AND_VALUES_CAPACITY, extension);
+ } else {
+ I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
+ }
+
+ // Convert ICU locale name into BCP47 format.
+ UErrorCode status = U_ZERO_ERROR;
+ uloc_toLanguageTag(result->icu_id, result->bcp47_id,
+ ULOC_FULLNAME_CAPACITY, false, &status);
+ return !U_FAILURE(status);
+}
+
+} } // namespace v8::internal
« no previous file with comments | « src/extensions/experimental/language-matcher.h ('k') | src/preparse-data-format.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698