Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(80)

Side by Side Diff: src/extensions/experimental/language-matcher.cc

Issue 6967005: Add new files missing in previous commit. (Closed) Base URL: https://v8.googlecode.com/svn/trunk
Patch Set: Add missing new files from previous commit. Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/extensions/experimental/language-matcher.h ('k') | src/preparse-data-format.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements
29 // language matching API.
30
31 #include "language-matcher.h"
32
33 #include "i18n-utils.h"
34 #include "unicode/datefmt.h" // For getAvailableLocales
35 #include "unicode/locid.h"
36 #include "unicode/uloc.h"
37 #include "utils.h"
38
39 namespace v8 {
40 namespace internal {
41
42 const unsigned int LanguageMatcher::kLanguageWeight = 75;
43 const unsigned int LanguageMatcher::kScriptWeight = 20;
44 const unsigned int LanguageMatcher::kRegionWeight = 5;
45 const unsigned int LanguageMatcher::kThreshold = 50;
46 const unsigned int LanguageMatcher::kPositionBonus = 1;
47 const char* const LanguageMatcher::kDefaultLocale = "root";
48
49 static const char* GetLanguageException(const char*);
50 static bool BCP47ToICUFormat(const char*, char*);
51 static int CompareLocaleSubtags(const char*, const char*);
52 static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*);
53
54 LocaleIDMatch::LocaleIDMatch()
55 : score(-1) {
56 I18NUtils::StrNCopy(
57 bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
58
59 I18NUtils::StrNCopy(
60 icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
61 }
62
63 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
64 I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id);
65 I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id);
66 this->score = rhs.score;
67
68 return *this;
69 }
70
71 // static
72 void LanguageMatcher::GetBestMatchForPriorityList(
73 v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
74 v8::HandleScope handle_scope;
75
76 unsigned int position_bonus = locales->Length() * kPositionBonus;
77
78 int max_score = 0;
79 LocaleIDMatch match;
80 for (unsigned int i = 0; i < locales->Length(); ++i) {
81 position_bonus -= kPositionBonus;
82
83 v8::TryCatch try_catch;
84 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
85
86 // Return default if exception is raised when reading parameter.
87 if (try_catch.HasCaught()) break;
88
89 // JavaScript arrays can be heterogenous so check each item
90 // if it's a string.
91 if (!locale_id->IsString()) continue;
92
93 if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) {
94 continue;
95 }
96
97 // Skip items under threshold.
98 if (match.score < kThreshold) continue;
99
100 match.score += position_bonus;
101 if (match.score > max_score) {
102 *result = match;
103
104 max_score = match.score;
105 }
106 }
107 }
108
109 // static
110 void LanguageMatcher::GetBestMatchForString(
111 v8::Handle<v8::String> locale, LocaleIDMatch* result) {
112 LocaleIDMatch match;
113
114 if (CompareToSupportedLocaleIDList(locale, &match) &&
115 match.score >= kThreshold) {
116 *result = match;
117 }
118 }
119
120 // static
121 bool LanguageMatcher::CompareToSupportedLocaleIDList(
122 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
123 static int32_t available_count = 0;
124 // Depending on how ICU data is built, locales returned by
125 // Locale::getAvailableLocale() are not guaranteed to support DateFormat,
126 // Collation and other services. We can call getAvailableLocale() of all the
127 // services we want to support and take the intersection of them all, but
128 // using DateFormat::getAvailableLocales() should suffice.
129 // TODO(cira): Maybe make this thread-safe?
130 static const icu::Locale* available_locales =
131 icu::DateFormat::getAvailableLocales(available_count);
132
133 // Skip this locale_id if it's not in ASCII.
134 static LocaleIDMatch default_match;
135 v8::String::AsciiValue ascii_value(locale_id);
136 if (*ascii_value == NULL) return false;
137
138 char locale[ULOC_FULLNAME_CAPACITY];
139 if (!BCP47ToICUFormat(*ascii_value, locale)) return false;
140
141 icu::Locale input_locale(locale);
142
143 // Position of the best match locale in list of available locales.
144 int position = -1;
145 const char* language = GetLanguageException(input_locale.getLanguage());
146 const char* script = input_locale.getScript();
147 const char* region = input_locale.getCountry();
148 for (int32_t i = 0; i < available_count; ++i) {
149 int current_score = 0;
150 int sign =
151 CompareLocaleSubtags(language, available_locales[i].getLanguage());
152 current_score += sign * kLanguageWeight;
153
154 sign = CompareLocaleSubtags(script, available_locales[i].getScript());
155 current_score += sign * kScriptWeight;
156
157 sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
158 current_score += sign * kRegionWeight;
159
160 if (current_score >= kThreshold && current_score > result->score) {
161 result->score = current_score;
162 position = i;
163 }
164 }
165
166 // Didn't find any good matches so use defaults.
167 if (position == -1) return false;
168
169 return BuildLocaleName(available_locales[position].getBaseName(),
170 input_locale.getName(), result);
171 }
172
173 // For some unsupported language subtags it is better to fallback to related
174 // language that is supported than to default.
175 static const char* GetLanguageException(const char* language) {
176 // Serbo-croatian to Serbian.
177 if (!strcmp(language, "sh")) return "sr";
178
179 // Norweigan to Norweiaan to Norwegian Bokmal.
180 if (!strcmp(language, "no")) return "nb";
181
182 // Moldavian to Romanian.
183 if (!strcmp(language, "mo")) return "ro";
184
185 // Tagalog to Filipino.
186 if (!strcmp(language, "tl")) return "fil";
187
188 return language;
189 }
190
191 // Converts user input from BCP47 locale id format to ICU compatible format.
192 // Returns false if uloc_forLanguageTag call fails or if extension is too long.
193 static bool BCP47ToICUFormat(const char* locale_id, char* result) {
194 UErrorCode status = U_ZERO_ERROR;
195 int32_t locale_size = 0;
196
197 char locale[ULOC_FULLNAME_CAPACITY];
198 I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id);
199
200 // uloc_forLanguageTag has a bug where long extension can crash the code.
201 // We need to check if extension part of language id conforms to the length.
202 // ICU bug: http://bugs.icu-project.org/trac/ticket/8519
203 const char* extension = strstr(locale_id, "-u-");
204 if (extension != NULL &&
205 strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) {
206 // Truncate to get non-crashing string, but still preserve base language.
207 int base_length = strlen(locale_id) - strlen(extension);
208 locale[base_length] = '\0';
209 }
210
211 uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY,
212 &locale_size, &status);
213 return !U_FAILURE(status);
214 }
215
216 // Compares locale id subtags.
217 // Returns 1 for match or -1 for mismatch.
218 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
219 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
220 }
221
222 // Builds a BCP47 compliant locale id from base name of matched locale and
223 // full user specified locale.
224 // Returns false if uloc_toLanguageTag failed to convert locale id.
225 // Example:
226 // base_name of matched locale (ICU ID): de_DE
227 // input_locale_name (ICU ID): de_AT@collation=phonebk
228 // result (ICU ID): de_DE@collation=phonebk
229 // result (BCP47 ID): de-DE-u-co-phonebk
230 static bool BuildLocaleName(const char* base_name,
231 const char* input_locale_name,
232 LocaleIDMatch* result) {
233 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
234
235 // Get extensions (if any) from the original locale.
236 const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR);
237 if (extension != NULL) {
238 I18NUtils::StrNCopy(result->icu_id + strlen(base_name),
239 ULOC_KEYWORD_AND_VALUES_CAPACITY, extension);
240 } else {
241 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
242 }
243
244 // Convert ICU locale name into BCP47 format.
245 UErrorCode status = U_ZERO_ERROR;
246 uloc_toLanguageTag(result->icu_id, result->bcp47_id,
247 ULOC_FULLNAME_CAPACITY, false, &status);
248 return !U_FAILURE(status);
249 }
250
251 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/extensions/experimental/language-matcher.h ('k') | src/preparse-data-format.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698