| OLD | NEW |
| (Empty) | |
| 1 // Copyright (C) 2014 Google Inc. |
| 2 // |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 // you may not use this file except in compliance with the License. |
| 5 // You may obtain a copy of the License at |
| 6 // |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 // |
| 9 // Unless required by applicable law or agreed to in writing, software |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 // See the License for the specific language governing permissions and |
| 13 // limitations under the License. |
| 14 |
| 15 #include "language.h" |
| 16 |
| 17 #include <algorithm> |
| 18 #include <cctype> |
| 19 #include <string> |
| 20 #include <vector> |
| 21 |
| 22 #include "rule.h" |
| 23 #include "util/string_util.h" |
| 24 |
| 25 namespace i18n { |
| 26 namespace addressinput { |
| 27 |
| 28 Language::Language(const std::string& language_tag) : tag(language_tag), |
| 29 base(), |
| 30 has_latin_script(false) { |
| 31 // Character '-' is the separator for subtags in the BCP 47. However, some |
| 32 // legacy code generates tags with '_' instead of '-'. |
| 33 static const char kSubtagsSeparator = '-'; |
| 34 static const char kAlternativeSubtagsSeparator = '_'; |
| 35 std::replace(tag.begin(), tag.end(), kAlternativeSubtagsSeparator, |
| 36 kSubtagsSeparator); |
| 37 |
| 38 // OK to use 'tolower' because BCP 47 tags are always in ASCII. |
| 39 std::string lowercase = tag; |
| 40 std::transform(lowercase.begin(), lowercase.end(), lowercase.begin(), |
| 41 tolower); |
| 42 |
| 43 base = lowercase.substr(0, lowercase.find(kSubtagsSeparator)); |
| 44 |
| 45 // The lowercase BCP 47 subtag for Latin script. |
| 46 static const char kLowercaseLatinScript[] = "latn"; |
| 47 std::vector<std::string> subtags; |
| 48 SplitString(lowercase, kSubtagsSeparator, &subtags); |
| 49 |
| 50 // Support only the second and third position for the script. |
| 51 has_latin_script = |
| 52 (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) || |
| 53 (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript); |
| 54 } |
| 55 |
| 56 Language::~Language() {} |
| 57 |
| 58 Language ChooseBestAddressLanguage(const Rule& address_region_rule, |
| 59 const Language& ui_language) { |
| 60 if (address_region_rule.GetLanguages().empty()) { |
| 61 return ui_language; |
| 62 } |
| 63 |
| 64 std::vector<Language> available_languages; |
| 65 for (std::vector<std::string>::const_iterator |
| 66 language_tag_it = address_region_rule.GetLanguages().begin(); |
| 67 language_tag_it != address_region_rule.GetLanguages().end(); |
| 68 ++language_tag_it) { |
| 69 available_languages.push_back(Language(*language_tag_it)); |
| 70 } |
| 71 |
| 72 if (ui_language.tag.empty()) { |
| 73 return available_languages.front(); |
| 74 } |
| 75 |
| 76 bool has_latin_format = !address_region_rule.GetLatinFormat().empty(); |
| 77 |
| 78 // The conventionally formatted BCP 47 Latin script with a preceding subtag |
| 79 // separator. |
| 80 static const char kLatinScriptSuffix[] = "-Latn"; |
| 81 Language latin_script_language( |
| 82 available_languages.front().base + kLatinScriptSuffix); |
| 83 if (has_latin_format && ui_language.has_latin_script) { |
| 84 return latin_script_language; |
| 85 } |
| 86 |
| 87 for (std::vector<Language>::const_iterator |
| 88 available_lang_it = available_languages.begin(); |
| 89 available_lang_it != available_languages.end(); ++available_lang_it) { |
| 90 // Base language comparison works because no region supports the same base |
| 91 // language with different scripts, for now. For example, no region supports |
| 92 // "zh-Hant" and "zh-Hans" at the same time. |
| 93 if (ui_language.base == available_lang_it->base) { |
| 94 return *available_lang_it; |
| 95 } |
| 96 } |
| 97 |
| 98 return has_latin_format ? latin_script_language : available_languages.front(); |
| 99 } |
| 100 |
| 101 } // namespace addressinput |
| 102 } // namespace i18n |
| OLD | NEW |