Chromium Code Reviews

Side by Side Diff: src/extensions/experimental/language-matcher.cc

Issue 6901141: Change v8Locale to match proposal - constructor is different and I've added (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: strcpy->snprintf Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements
29 // language matching API.
30
31 #include "language-matcher.h"
32
33 #include "unicode/locid.h"
34 #include "unicode/uloc.h"
35 #include "utils.h" // For Min/Max
36
37 namespace v8 {
38 namespace internal {
39
40 const unsigned int LanguageMatcher::kLanguageDistance = 75;
jungshik at Google 2011/05/02 19:57:23 nit: again, this is not a distance but a score/wei
Nebojša Ćirić 2011/05/02 22:44:01 Done.
41 const unsigned int LanguageMatcher::kScriptDistance = 20;
42 const unsigned int LanguageMatcher::kRegionDistance = 5;
43 const unsigned int LanguageMatcher::kThreshold = 50;
44 const unsigned int LanguageMatcher::kPositionBonus = 1;
45 const char* const LanguageMatcher::kDefaultLocale = "root";
46
47 static const char* GetLanguageException(const char*);
48 static void BCP47ToICUFormat(v8::Handle<v8::String>, char*);
49 static int CompareLocaleSubtags(const char*, const char*);
50 static void BuildLocaleName(const char*, const char*, LocaleIDMatch*);
51
52 LocaleIDMatch::LocaleIDMatch()
53 : rank(-1) {
54 snprintf(bcp47_locale, ULOC_FULLNAME_CAPACITY,
55 "%s", LanguageMatcher::kDefaultLocale);
56 snprintf(icu_locale, ULOC_FULLNAME_CAPACITY,
57 "%s", LanguageMatcher::kDefaultLocale);
58 }
59
60 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
61 snprintf(this->bcp47_locale, ULOC_FULLNAME_CAPACITY,
62 "%s", rhs.bcp47_locale);
63 snprintf(this->icu_locale, ULOC_FULLNAME_CAPACITY,
64 "%s", rhs.icu_locale);
65 this->rank = rhs.rank;
66 }
67
68 // static
69 void LanguageMatcher::GetBestMatchForPriorityList(
70 v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
71 v8::HandleScope handle_scope;
72
73 unsigned int position_bonus = locales->Length() * kPositionBonus;
74
75 int max_rank = 0;
76 LocaleIDMatch match;
77 for (unsigned int i = 0; i < locales->Length(); ++i) {
78 position_bonus -= kPositionBonus;
79
80 v8::TryCatch try_catch;
81 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
82
83 // Return default if exception is raised when reading parameter.
84 if (try_catch.HasCaught()) break;
85
86 // JavaScript arrays can be heterogenous so check each item
87 // if it's a string.
88 if (!locale_id->IsString()) continue;
89
90 CompareToSupportedLocaleIDList(locale_id->ToString(), &match);
91
92 // Skip items under threshold.
93 if (match.rank < kThreshold) continue;
94
95 match.rank += position_bonus;
96 if (match.rank > max_rank) {
97 *result = match;
98
99 max_rank = match.rank;
100 }
101 }
102 }
103
104 // static
105 void LanguageMatcher::GetBestMatchForString(
106 v8::Handle<v8::String> locale, LocaleIDMatch* result) {
107 LocaleIDMatch match;
108 CompareToSupportedLocaleIDList(locale, &match);
109
110 if (match.rank >= kThreshold) {
111 *result = match;
112 }
113 }
114
115 // static
116 void LanguageMatcher::CompareToSupportedLocaleIDList(
117 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
118 static int32_t available_count = 0;
119 static const icu::Locale* available_locales =
120 icu::Locale::getAvailableLocales(available_count);
jungshik at Google 2011/05/02 19:57:23 Don't we need any lock around here? More importan
Nebojša Ćirić 2011/05/02 22:44:01 Added a TODO, but I don't think it's an issue. One
121
122 char locale[ULOC_FULLNAME_CAPACITY];
123 BCP47ToICUFormat(locale_id, locale);
124 icu::Locale user_locale(locale);
jungshik at Google 2011/05/02 19:57:23 how about input_locale?
Nebojša Ćirić 2011/05/02 22:44:01 Done.
125
126 // Position of the best match locale in list of available locales.
127 int position = -1;
128 result->rank = 0;
129 const char* language = GetLanguageException(user_locale.getLanguage());
130 const char* script = user_locale.getScript();
131 const char* region = user_locale.getCountry();
132 for (int32_t i = 0; i < available_count; ++i) {
133 int current_rank = 0;
134 int sign =
135 CompareLocaleSubtags(language, available_locales[i].getLanguage());
136 current_rank += sign * kLanguageDistance;
137
138 sign = CompareLocaleSubtags(script, available_locales[i].getScript());
139 current_rank += sign * kScriptDistance;
140
141 sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
142 current_rank += sign * kRegionDistance;
143
144 if (current_rank > result->rank) {
145 result->rank = current_rank;
146 position = i;
147 }
148 }
149
150 if (result->rank < kThreshold || position == -1) {
151 LocaleIDMatch default_match;
152 *result = default_match;
153 return;
154 }
155
156 BuildLocaleName(available_locales[position].getBaseName(),
157 user_locale.getName(), result);
158 }
159
160 // For some unsupported language subtags it is better to fallback to related
161 // language that is supported than to default.
162 static const char* GetLanguageException(const char* language) {
163 if (!strcmp(language, "sh")) return "sr";
164 if (!strcmp(language, "no")) return "nb";
165 if (!strcmp(language, "mo")) return "ro";
166 if (!strcmp(language, "tl")) return "fil";
jungshik at Google 2011/05/02 19:57:23 Comment these entries? e.g. Tagalog => Filipino
Nebojša Ćirić 2011/05/02 22:44:01 Done.
167
168 return language;
169 }
170
171 // Converts user input from BCP47 locale id format to ICU compatible format.
172 static void BCP47ToICUFormat(v8::Handle<v8::String> locale_id, char* locale) {
173 UErrorCode status = U_ZERO_ERROR;
174 int32_t locale_size = 0;
175 uloc_forLanguageTag(*v8::String::Utf8Value(locale_id), locale,
jungshik at Google 2011/05/02 19:57:23 ASCIIValue?
Nebojša Ćirić 2011/05/02 22:44:01 Already done in parent method. On 2011/05/02 19:5
176 ULOC_FULLNAME_CAPACITY, &locale_size, &status);
177 }
178
179 // Compares locale id subtags.
180 // Returns 1 for match or-1 for mismatch.
jungshik at Google 2011/05/02 19:57:23 nit: "or-1" -> "or -1"
Nebojša Ćirić 2011/05/02 22:44:01 Done.
181 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
182 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
183 }
184
185 // Builds a BCP47 compliant locale id from base name of matched locale and
186 // full user specified locale.
187 // Example:
188 // base_name: de_DE
jungshik at Google 2011/05/02 19:57:23 base_name of the matched locale (ICU ID): de_DE
Nebojša Ćirić 2011/05/02 22:44:01 Done.
189 // user_locale_name: de_AT@collation=phonebk
jungshik at Google 2011/05/02 19:57:23 input_locale_name (ICU ID)
Nebojša Ćirić 2011/05/02 22:44:01 Done.
190 // ICU compatible result: de_DE@collation=phonebk
jungshik at Google 2011/05/02 19:57:23 result (ICU ID)
Nebojša Ćirić 2011/05/02 22:44:01 Done.
191 // BCP47 compatible result: de-DE-u-co-phonebk
jungshik at Google 2011/05/02 19:57:23 result (BCP 47)
Nebojša Ćirić 2011/05/02 22:44:01 Done.
192 static void BuildLocaleName(const char* base_name,
193 const char* user_locale_name,
194 LocaleIDMatch* result) {
195 // Get extensions (if any) from the original locale.
196 const char* extension = strchr(user_locale_name, ULOC_KEYWORD_SEPARATOR);
197 if (extension != NULL) {
198 snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY,
199 "%s%s", base_name, extension);
200 } else {
201 snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY, "%s", base_name);
202 }
203
204 // Convert ICU locale name into BCP47 format.
205 UErrorCode status = U_ZERO_ERROR;
206 uloc_toLanguageTag(result->icu_locale, result->bcp47_locale,
207 ULOC_FULLNAME_CAPACITY, false, &status);
jungshik at Google 2011/05/02 19:57:23 Instead of ignoring status, either assert U_SUCCES
Nebojša Ćirić 2011/05/02 22:44:01 Done.
208 }
209
210 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine