OLD | NEW |
| (Empty) |
1 // Copyright (C) 2014 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 #include "language.h" | |
16 | |
17 #include <algorithm> | |
18 #include <cctype> | |
19 #include <string> | |
20 #include <vector> | |
21 | |
22 #include "rule.h" | |
23 #include "util/string_util.h" | |
24 | |
25 namespace i18n { | |
26 namespace addressinput { | |
27 | |
28 Language::Language(const std::string& language_tag) : tag(language_tag), | |
29 base(), | |
30 has_latin_script(false) { | |
31 // Character '-' is the separator for subtags in the BCP 47. However, some | |
32 // legacy code generates tags with '_' instead of '-'. | |
33 static const char kSubtagsSeparator = '-'; | |
34 static const char kAlternativeSubtagsSeparator = '_'; | |
35 std::replace(tag.begin(), tag.end(), kAlternativeSubtagsSeparator, | |
36 kSubtagsSeparator); | |
37 | |
38 // OK to use 'tolower' because BCP 47 tags are always in ASCII. | |
39 std::string lowercase = tag; | |
40 std::transform(lowercase.begin(), lowercase.end(), lowercase.begin(), | |
41 tolower); | |
42 | |
43 base = lowercase.substr(0, lowercase.find(kSubtagsSeparator)); | |
44 | |
45 // The lowercase BCP 47 subtag for Latin script. | |
46 static const char kLowercaseLatinScript[] = "latn"; | |
47 std::vector<std::string> subtags; | |
48 SplitString(lowercase, kSubtagsSeparator, &subtags); | |
49 | |
50 // Support only the second and third position for the script. | |
51 has_latin_script = | |
52 (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) || | |
53 (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript); | |
54 } | |
55 | |
56 Language::~Language() {} | |
57 | |
58 Language ChooseBestAddressLanguage(const Rule& address_region_rule, | |
59 const Language& ui_language) { | |
60 if (address_region_rule.GetLanguages().empty()) { | |
61 return ui_language; | |
62 } | |
63 | |
64 std::vector<Language> available_languages; | |
65 for (std::vector<std::string>::const_iterator | |
66 language_tag_it = address_region_rule.GetLanguages().begin(); | |
67 language_tag_it != address_region_rule.GetLanguages().end(); | |
68 ++language_tag_it) { | |
69 available_languages.push_back(Language(*language_tag_it)); | |
70 } | |
71 | |
72 if (ui_language.tag.empty()) { | |
73 return available_languages.front(); | |
74 } | |
75 | |
76 bool has_latin_format = !address_region_rule.GetLatinFormat().empty(); | |
77 | |
78 // The conventionally formatted BCP 47 Latin script with a preceding subtag | |
79 // separator. | |
80 static const char kLatinScriptSuffix[] = "-Latn"; | |
81 Language latin_script_language( | |
82 available_languages.front().base + kLatinScriptSuffix); | |
83 if (has_latin_format && ui_language.has_latin_script) { | |
84 return latin_script_language; | |
85 } | |
86 | |
87 for (std::vector<Language>::const_iterator | |
88 available_lang_it = available_languages.begin(); | |
89 available_lang_it != available_languages.end(); ++available_lang_it) { | |
90 // Base language comparison works because no region supports the same base | |
91 // language with different scripts, for now. For example, no region supports | |
92 // "zh-Hant" and "zh-Hans" at the same time. | |
93 if (ui_language.base == available_lang_it->base) { | |
94 return *available_lang_it; | |
95 } | |
96 } | |
97 | |
98 return has_latin_format ? latin_script_language : available_languages.front(); | |
99 } | |
100 | |
101 } // namespace addressinput | |
102 } // namespace i18n | |
OLD | NEW |