OLD | NEW |
(Empty) | |
| 1 // Copyright (C) 2014 Google Inc. |
| 2 // |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 // you may not use this file except in compliance with the License. |
| 5 // You may obtain a copy of the License at |
| 6 // |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 // |
| 9 // Unless required by applicable law or agreed to in writing, software |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 // See the License for the specific language governing permissions and |
| 13 // limitations under the License. |
| 14 |
| 15 #include <libaddressinput/address_formatter.h> |
| 16 |
| 17 #include <libaddressinput/address_data.h> |
| 18 #include <libaddressinput/address_field.h> |
| 19 |
| 20 #include <algorithm> |
| 21 #include <cassert> |
| 22 #include <cstddef> |
| 23 #include <functional> |
| 24 #include <string> |
| 25 #include <vector> |
| 26 |
| 27 #include "language.h" |
| 28 #include "region_data_constants.h" |
| 29 #include "rule.h" |
| 30 #include "util/cctype_tolower_equal.h" |
| 31 |
| 32 namespace i18n { |
| 33 namespace addressinput { |
| 34 |
| 35 namespace { |
| 36 |
| 37 const char kCommaSeparator[] = ", "; |
| 38 const char kSpaceSeparator[] = " "; |
| 39 const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */ |
| 40 |
| 41 const char* kLanguagesThatUseSpace[] = { |
| 42 "th", |
| 43 "ko" |
| 44 }; |
| 45 |
| 46 const char* kLanguagesThatHaveNoSeparator[] = { |
| 47 "ja", |
| 48 "zh" // All Chinese variants. |
| 49 }; |
| 50 |
| 51 // This data is based on CLDR, for languages that are in official use in some |
| 52 // country, where Arabic is the most likely script tag. |
| 53 // TODO: Consider supporting variants such as tr-Arab by detecting the script |
| 54 // code. |
| 55 const char* kLanguagesThatUseAnArabicComma[] = { |
| 56 "ar", |
| 57 "az", |
| 58 "fa", |
| 59 "kk", |
| 60 "ku", |
| 61 "ky", |
| 62 "ps", |
| 63 "tg", |
| 64 "tk", |
| 65 "ur", |
| 66 "uz" |
| 67 }; |
| 68 |
| 69 std::string GetLineSeparatorForLanguage(const std::string& language_tag) { |
| 70 Language address_language(language_tag); |
| 71 |
| 72 // First deal with explicit script tags. |
| 73 if (address_language.has_latin_script) { |
| 74 return kCommaSeparator; |
| 75 } |
| 76 |
| 77 // Now guess something appropriate based on the base language. |
| 78 const std::string& base_language = address_language.base; |
| 79 if (std::find_if(kLanguagesThatUseSpace, |
| 80 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace), |
| 81 std::bind2nd(EqualToTolowerString(), base_language)) != |
| 82 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) { |
| 83 return kSpaceSeparator; |
| 84 } else if (std::find_if( |
| 85 kLanguagesThatHaveNoSeparator, |
| 86 kLanguagesThatHaveNoSeparator + |
| 87 arraysize(kLanguagesThatHaveNoSeparator), |
| 88 std::bind2nd(EqualToTolowerString(), base_language)) != |
| 89 kLanguagesThatHaveNoSeparator + |
| 90 arraysize(kLanguagesThatHaveNoSeparator)) { |
| 91 return ""; |
| 92 } else if (std::find_if( |
| 93 kLanguagesThatUseAnArabicComma, |
| 94 kLanguagesThatUseAnArabicComma + |
| 95 arraysize(kLanguagesThatUseAnArabicComma), |
| 96 std::bind2nd(EqualToTolowerString(), base_language)) != |
| 97 kLanguagesThatUseAnArabicComma + |
| 98 arraysize(kLanguagesThatUseAnArabicComma)) { |
| 99 return kArabicCommaSeparator; |
| 100 } |
| 101 // Either the language is a latin-script language, or no language was |
| 102 // specified. In the latter case we still return ", " as the most common |
| 103 // separator in use. In countries that don't use this, e.g. Thailand, |
| 104 // addresses are often written in latin script where this would still be |
| 105 // appropriate, so this is a reasonable default in the absence of information. |
| 106 return kCommaSeparator; |
| 107 } |
| 108 |
| 109 void CombineLinesForLanguage( |
| 110 const std::vector<std::string>& lines, const std::string& language_tag, |
| 111 std::string *line) { |
| 112 if (lines.size() > 0) { |
| 113 line->assign(lines[0]); |
| 114 } |
| 115 std::string separator = GetLineSeparatorForLanguage(language_tag); |
| 116 for (std::vector<std::string>::const_iterator it = lines.begin() + 1; |
| 117 it < lines.end(); ++it) { |
| 118 line->append(separator); |
| 119 line->append(*it); |
| 120 } |
| 121 } |
| 122 |
| 123 } // namespace |
| 124 |
| 125 void GetFormattedNationalAddress( |
| 126 const AddressData& address_data, std::vector<std::string>* lines) { |
| 127 assert(lines != NULL); |
| 128 lines->clear(); |
| 129 |
| 130 Rule rule; |
| 131 rule.CopyFrom(Rule::GetDefault()); |
| 132 // TODO: Eventually, we should get the best rule for this country and |
| 133 // language, rather than just for the country. |
| 134 rule.ParseSerializedRule(RegionDataConstants::GetRegionData( |
| 135 address_data.region_code)); |
| 136 |
| 137 Language language(address_data.language_code); |
| 138 |
| 139 // If latinized rules are available and the |language_code| of this address is |
| 140 // explicitly tagged as being Latin, then use the latinized formatting rules. |
| 141 const std::vector<std::vector<FormatElement> >& format = |
| 142 language.has_latin_script && !rule.GetLatinFormat().empty() |
| 143 ? rule.GetLatinFormat() : rule.GetFormat(); |
| 144 |
| 145 lines->push_back(std::string()); |
| 146 for (size_t i = 0; i < format.size(); ++i) { |
| 147 if (!lines->back().empty()) { |
| 148 lines->push_back(std::string()); |
| 149 } |
| 150 for (size_t j = 0; j < format[i].size(); ++j) { |
| 151 const FormatElement& element = format[i][j]; |
| 152 if (element.IsField()) { |
| 153 if (element.field == STREET_ADDRESS) { |
| 154 // The field "street address" represents the street address lines of |
| 155 // an address, so there can be multiple values. |
| 156 if (lines->back().empty()) { |
| 157 lines->pop_back(); |
| 158 } |
| 159 lines->insert(lines->end(), |
| 160 address_data.address_line.begin(), |
| 161 address_data.address_line.end()); |
| 162 } else { |
| 163 lines->back().append(address_data.GetFieldValue(element.field)); |
| 164 } |
| 165 } else { |
| 166 lines->back().append(element.literal); |
| 167 } |
| 168 } |
| 169 } |
| 170 |
| 171 if (lines->back().empty()) { |
| 172 lines->pop_back(); |
| 173 } |
| 174 } |
| 175 |
| 176 void GetFormattedNationalAddressLine( |
| 177 const AddressData& address_data, std::string* line) { |
| 178 std::vector<std::string> address_lines; |
| 179 GetFormattedNationalAddress(address_data, &address_lines); |
| 180 CombineLinesForLanguage(address_lines, address_data.language_code, line); |
| 181 } |
| 182 |
| 183 void GetStreetAddressLinesAsSingleLine( |
| 184 const AddressData& address_data, std::string* line) { |
| 185 CombineLinesForLanguage( |
| 186 address_data.address_line, address_data.language_code, line); |
| 187 } |
| 188 |
| 189 } // namespace addressinput |
| 190 } // namespace i18n |
OLD | NEW |