|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements | |
29 // language matching API. | |
30 | |
31 #include "language-matcher.h" | |
32 | |
33 #include "unicode/locid.h" | |
34 #include "unicode/uloc.h" | |
35 #include "utils.h" // For Min/Max | |
36 | |
37 namespace v8 { | |
38 namespace internal { | |
39 | |
40 const unsigned int LanguageMatcher::kLanguageDistance = 75; | |
jungshik at Google
2011/05/02 19:57:23
nit: again, this is not a distance but a score/wei
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
41 const unsigned int LanguageMatcher::kScriptDistance = 20; | |
42 const unsigned int LanguageMatcher::kRegionDistance = 5; | |
43 const unsigned int LanguageMatcher::kThreshold = 50; | |
44 const unsigned int LanguageMatcher::kPositionBonus = 1; | |
45 const char* const LanguageMatcher::kDefaultLocale = "root"; | |
46 | |
47 static const char* GetLanguageException(const char*); | |
48 static void BCP47ToICUFormat(v8::Handle<v8::String>, char*); | |
49 static int CompareLocaleSubtags(const char*, const char*); | |
50 static void BuildLocaleName(const char*, const char*, LocaleIDMatch*); | |
51 | |
52 LocaleIDMatch::LocaleIDMatch() | |
53 : rank(-1) { | |
54 snprintf(bcp47_locale, ULOC_FULLNAME_CAPACITY, | |
55 "%s", LanguageMatcher::kDefaultLocale); | |
56 snprintf(icu_locale, ULOC_FULLNAME_CAPACITY, | |
57 "%s", LanguageMatcher::kDefaultLocale); | |
58 } | |
59 | |
60 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) { | |
61 snprintf(this->bcp47_locale, ULOC_FULLNAME_CAPACITY, | |
62 "%s", rhs.bcp47_locale); | |
63 snprintf(this->icu_locale, ULOC_FULLNAME_CAPACITY, | |
64 "%s", rhs.icu_locale); | |
65 this->rank = rhs.rank; | |
66 } | |
67 | |
68 // static | |
69 void LanguageMatcher::GetBestMatchForPriorityList( | |
70 v8::Handle<v8::Array> locales, LocaleIDMatch* result) { | |
71 v8::HandleScope handle_scope; | |
72 | |
73 unsigned int position_bonus = locales->Length() * kPositionBonus; | |
74 | |
75 int max_rank = 0; | |
76 LocaleIDMatch match; | |
77 for (unsigned int i = 0; i < locales->Length(); ++i) { | |
78 position_bonus -= kPositionBonus; | |
79 | |
80 v8::TryCatch try_catch; | |
81 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i)); | |
82 | |
83 // Return default if exception is raised when reading parameter. | |
84 if (try_catch.HasCaught()) break; | |
85 | |
86 // JavaScript arrays can be heterogenous so check each item | |
87 // if it's a string. | |
88 if (!locale_id->IsString()) continue; | |
89 | |
90 CompareToSupportedLocaleIDList(locale_id->ToString(), &match); | |
91 | |
92 // Skip items under threshold. | |
93 if (match.rank < kThreshold) continue; | |
94 | |
95 match.rank += position_bonus; | |
96 if (match.rank > max_rank) { | |
97 *result = match; | |
98 | |
99 max_rank = match.rank; | |
100 } | |
101 } | |
102 } | |
103 | |
104 // static | |
105 void LanguageMatcher::GetBestMatchForString( | |
106 v8::Handle<v8::String> locale, LocaleIDMatch* result) { | |
107 LocaleIDMatch match; | |
108 CompareToSupportedLocaleIDList(locale, &match); | |
109 | |
110 if (match.rank >= kThreshold) { | |
111 *result = match; | |
112 } | |
113 } | |
114 | |
115 // static | |
116 void LanguageMatcher::CompareToSupportedLocaleIDList( | |
117 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) { | |
118 static int32_t available_count = 0; | |
119 static const icu::Locale* available_locales = | |
120 icu::Locale::getAvailableLocales(available_count); | |
jungshik at Google
2011/05/02 19:57:23
Don't we need any lock around here?
More importan
Nebojša Ćirić
2011/05/02 22:44:01
Added a TODO, but I don't think it's an issue. One
| |
121 | |
122 char locale[ULOC_FULLNAME_CAPACITY]; | |
123 BCP47ToICUFormat(locale_id, locale); | |
124 icu::Locale user_locale(locale); | |
jungshik at Google
2011/05/02 19:57:23
how about input_locale?
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
125 | |
126 // Position of the best match locale in list of available locales. | |
127 int position = -1; | |
128 result->rank = 0; | |
129 const char* language = GetLanguageException(user_locale.getLanguage()); | |
130 const char* script = user_locale.getScript(); | |
131 const char* region = user_locale.getCountry(); | |
132 for (int32_t i = 0; i < available_count; ++i) { | |
133 int current_rank = 0; | |
134 int sign = | |
135 CompareLocaleSubtags(language, available_locales[i].getLanguage()); | |
136 current_rank += sign * kLanguageDistance; | |
137 | |
138 sign = CompareLocaleSubtags(script, available_locales[i].getScript()); | |
139 current_rank += sign * kScriptDistance; | |
140 | |
141 sign = CompareLocaleSubtags(region, available_locales[i].getCountry()); | |
142 current_rank += sign * kRegionDistance; | |
143 | |
144 if (current_rank > result->rank) { | |
145 result->rank = current_rank; | |
146 position = i; | |
147 } | |
148 } | |
149 | |
150 if (result->rank < kThreshold || position == -1) { | |
151 LocaleIDMatch default_match; | |
152 *result = default_match; | |
153 return; | |
154 } | |
155 | |
156 BuildLocaleName(available_locales[position].getBaseName(), | |
157 user_locale.getName(), result); | |
158 } | |
159 | |
160 // For some unsupported language subtags it is better to fallback to related | |
161 // language that is supported than to default. | |
162 static const char* GetLanguageException(const char* language) { | |
163 if (!strcmp(language, "sh")) return "sr"; | |
164 if (!strcmp(language, "no")) return "nb"; | |
165 if (!strcmp(language, "mo")) return "ro"; | |
166 if (!strcmp(language, "tl")) return "fil"; | |
jungshik at Google
2011/05/02 19:57:23
Comment these entries? e.g. Tagalog => Filipino
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
167 | |
168 return language; | |
169 } | |
170 | |
171 // Converts user input from BCP47 locale id format to ICU compatible format. | |
172 static void BCP47ToICUFormat(v8::Handle<v8::String> locale_id, char* locale) { | |
173 UErrorCode status = U_ZERO_ERROR; | |
174 int32_t locale_size = 0; | |
175 uloc_forLanguageTag(*v8::String::Utf8Value(locale_id), locale, | |
jungshik at Google
2011/05/02 19:57:23
ASCIIValue?
Nebojša Ćirić
2011/05/02 22:44:01
Already done in parent method.
On 2011/05/02 19:5
| |
176 ULOC_FULLNAME_CAPACITY, &locale_size, &status); | |
177 } | |
178 | |
179 // Compares locale id subtags. | |
180 // Returns 1 for match or-1 for mismatch. | |
jungshik at Google
2011/05/02 19:57:23
nit: "or-1" -> "or -1"
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
181 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) { | |
182 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1; | |
183 } | |
184 | |
185 // Builds a BCP47 compliant locale id from base name of matched locale and | |
186 // full user specified locale. | |
187 // Example: | |
188 // base_name: de_DE | |
jungshik at Google
2011/05/02 19:57:23
base_name of the matched locale (ICU ID): de_DE
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
189 // user_locale_name: de_AT@collation=phonebk | |
jungshik at Google
2011/05/02 19:57:23
input_locale_name (ICU ID)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
190 // ICU compatible result: de_DE@collation=phonebk | |
jungshik at Google
2011/05/02 19:57:23
result (ICU ID)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
191 // BCP47 compatible result: de-DE-u-co-phonebk | |
jungshik at Google
2011/05/02 19:57:23
result (BCP 47)
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
192 static void BuildLocaleName(const char* base_name, | |
193 const char* user_locale_name, | |
194 LocaleIDMatch* result) { | |
195 // Get extensions (if any) from the original locale. | |
196 const char* extension = strchr(user_locale_name, ULOC_KEYWORD_SEPARATOR); | |
197 if (extension != NULL) { | |
198 snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY, | |
199 "%s%s", base_name, extension); | |
200 } else { | |
201 snprintf(result->icu_locale, ULOC_FULLNAME_CAPACITY, "%s", base_name); | |
202 } | |
203 | |
204 // Convert ICU locale name into BCP47 format. | |
205 UErrorCode status = U_ZERO_ERROR; | |
206 uloc_toLanguageTag(result->icu_locale, result->bcp47_locale, | |
207 ULOC_FULLNAME_CAPACITY, false, &status); | |
jungshik at Google
2011/05/02 19:57:23
Instead of ignoring status, either assert U_SUCCES
Nebojša Ćirić
2011/05/02 22:44:01
Done.
| |
208 } | |
209 | |
210 } } // namespace v8::internal | |
OLD | NEW |