OLD | NEW |
| (Empty) |
1 // Copyright (C) 2014 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 #include <libaddressinput/address_formatter.h> | |
16 | |
17 #include <libaddressinput/address_data.h> | |
18 #include <libaddressinput/address_field.h> | |
19 | |
20 #include <algorithm> | |
21 #include <cassert> | |
22 #include <cstddef> | |
23 #include <functional> | |
24 #include <string> | |
25 #include <vector> | |
26 | |
27 #include "language.h" | |
28 #include "region_data_constants.h" | |
29 #include "rule.h" | |
30 #include "util/cctype_tolower_equal.h" | |
31 | |
32 namespace i18n { | |
33 namespace addressinput { | |
34 | |
35 namespace { | |
36 | |
37 const char kCommaSeparator[] = ", "; | |
38 const char kSpaceSeparator[] = " "; | |
39 const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */ | |
40 | |
41 const char* kLanguagesThatUseSpace[] = { | |
42 "th", | |
43 "ko" | |
44 }; | |
45 | |
46 const char* kLanguagesThatHaveNoSeparator[] = { | |
47 "ja", | |
48 "zh" // All Chinese variants. | |
49 }; | |
50 | |
51 // This data is based on CLDR, for languages that are in official use in some | |
52 // country, where Arabic is the most likely script tag. | |
53 // TODO: Consider supporting variants such as tr-Arab by detecting the script | |
54 // code. | |
55 const char* kLanguagesThatUseAnArabicComma[] = { | |
56 "ar", | |
57 "az", | |
58 "fa", | |
59 "kk", | |
60 "ku", | |
61 "ky", | |
62 "ps", | |
63 "tg", | |
64 "tk", | |
65 "ur", | |
66 "uz" | |
67 }; | |
68 | |
69 std::string GetLineSeparatorForLanguage(const std::string& language_tag) { | |
70 Language address_language(language_tag); | |
71 | |
72 // First deal with explicit script tags. | |
73 if (address_language.has_latin_script) { | |
74 return kCommaSeparator; | |
75 } | |
76 | |
77 // Now guess something appropriate based on the base language. | |
78 const std::string& base_language = address_language.base; | |
79 if (std::find_if(kLanguagesThatUseSpace, | |
80 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace), | |
81 std::bind2nd(EqualToTolowerString(), base_language)) != | |
82 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) { | |
83 return kSpaceSeparator; | |
84 } else if (std::find_if( | |
85 kLanguagesThatHaveNoSeparator, | |
86 kLanguagesThatHaveNoSeparator + | |
87 arraysize(kLanguagesThatHaveNoSeparator), | |
88 std::bind2nd(EqualToTolowerString(), base_language)) != | |
89 kLanguagesThatHaveNoSeparator + | |
90 arraysize(kLanguagesThatHaveNoSeparator)) { | |
91 return ""; | |
92 } else if (std::find_if( | |
93 kLanguagesThatUseAnArabicComma, | |
94 kLanguagesThatUseAnArabicComma + | |
95 arraysize(kLanguagesThatUseAnArabicComma), | |
96 std::bind2nd(EqualToTolowerString(), base_language)) != | |
97 kLanguagesThatUseAnArabicComma + | |
98 arraysize(kLanguagesThatUseAnArabicComma)) { | |
99 return kArabicCommaSeparator; | |
100 } | |
101 // Either the language is a latin-script language, or no language was | |
102 // specified. In the latter case we still return ", " as the most common | |
103 // separator in use. In countries that don't use this, e.g. Thailand, | |
104 // addresses are often written in latin script where this would still be | |
105 // appropriate, so this is a reasonable default in the absence of information. | |
106 return kCommaSeparator; | |
107 } | |
108 | |
109 void CombineLinesForLanguage( | |
110 const std::vector<std::string>& lines, const std::string& language_tag, | |
111 std::string *line) { | |
112 if (lines.size() > 0) { | |
113 line->assign(lines[0]); | |
114 } | |
115 std::string separator = GetLineSeparatorForLanguage(language_tag); | |
116 for (std::vector<std::string>::const_iterator it = lines.begin() + 1; | |
117 it < lines.end(); ++it) { | |
118 line->append(separator); | |
119 line->append(*it); | |
120 } | |
121 } | |
122 | |
123 } // namespace | |
124 | |
125 void GetFormattedNationalAddress( | |
126 const AddressData& address_data, std::vector<std::string>* lines) { | |
127 assert(lines != NULL); | |
128 lines->clear(); | |
129 | |
130 Rule rule; | |
131 rule.CopyFrom(Rule::GetDefault()); | |
132 // TODO: Eventually, we should get the best rule for this country and | |
133 // language, rather than just for the country. | |
134 rule.ParseSerializedRule(RegionDataConstants::GetRegionData( | |
135 address_data.region_code)); | |
136 | |
137 Language language(address_data.language_code); | |
138 | |
139 // If latinized rules are available and the |language_code| of this address is | |
140 // explicitly tagged as being Latin, then use the latinized formatting rules. | |
141 const std::vector<std::vector<FormatElement> >& format = | |
142 language.has_latin_script && !rule.GetLatinFormat().empty() | |
143 ? rule.GetLatinFormat() : rule.GetFormat(); | |
144 | |
145 lines->push_back(std::string()); | |
146 for (size_t i = 0; i < format.size(); ++i) { | |
147 if (!lines->back().empty()) { | |
148 lines->push_back(std::string()); | |
149 } | |
150 for (size_t j = 0; j < format[i].size(); ++j) { | |
151 const FormatElement& element = format[i][j]; | |
152 if (element.IsField()) { | |
153 if (element.field == STREET_ADDRESS) { | |
154 // The field "street address" represents the street address lines of | |
155 // an address, so there can be multiple values. | |
156 if (lines->back().empty()) { | |
157 lines->pop_back(); | |
158 } | |
159 lines->insert(lines->end(), | |
160 address_data.address_line.begin(), | |
161 address_data.address_line.end()); | |
162 } else { | |
163 lines->back().append(address_data.GetFieldValue(element.field)); | |
164 } | |
165 } else { | |
166 lines->back().append(element.literal); | |
167 } | |
168 } | |
169 } | |
170 | |
171 if (lines->back().empty()) { | |
172 lines->pop_back(); | |
173 } | |
174 } | |
175 | |
176 void GetFormattedNationalAddressLine( | |
177 const AddressData& address_data, std::string* line) { | |
178 std::vector<std::string> address_lines; | |
179 GetFormattedNationalAddress(address_data, &address_lines); | |
180 CombineLinesForLanguage(address_lines, address_data.language_code, line); | |
181 } | |
182 | |
183 void GetStreetAddressLinesAsSingleLine( | |
184 const AddressData& address_data, std::string* line) { | |
185 CombineLinesForLanguage( | |
186 address_data.address_line, address_data.language_code, line); | |
187 } | |
188 | |
189 } // namespace addressinput | |
190 } // namespace i18n | |
OLD | NEW |