third_party/libaddressinput/chromium/cpp/src/address_formatter.cc - Issue 368243007: Reland of "Use address_data.h from upstream libaddressinput".

Side by Side Diff: third_party/libaddressinput/chromium/cpp/src/address_formatter.cc

Issue 368243007: Reland of "Use address_data.h from upstream libaddressinput". (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Update BUILD.gn, fix a test. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « third_party/libaddressinput/chromium/cpp/src/address_data.cc ('k') | third_party/libaddressinput/chromium/cpp/src/address_metadata.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (C) 2014 Google Inc.

	2 //

	3 // Licensed under the Apache License, Version 2.0 (the "License");

	4 // you may not use this file except in compliance with the License.

	5 // You may obtain a copy of the License at

	6 //

	7 // http://www.apache.org/licenses/LICENSE-2.0

	8 //

	9 // Unless required by applicable law or agreed to in writing, software

	10 // distributed under the License is distributed on an "AS IS" BASIS,

	11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

	12 // See the License for the specific language governing permissions and

	13 // limitations under the License.

	14

	15 #include <libaddressinput/address_formatter.h>

	16

	17 #include <libaddressinput/address_data.h>

	18 #include <libaddressinput/address_field.h>

	19

	20 #include <algorithm>

	21 #include <cassert>

	22 #include <cstddef>

	23 #include <functional>

	24 #include <string>

	25 #include <vector>

	26

	27 #include "language.h"

	28 #include "region_data_constants.h"

	29 #include "rule.h"

	30 #include "util/cctype_tolower_equal.h"

	31

	32 namespace i18n {

	33 namespace addressinput {

	34

	35 namespace {

	36

	37 const char kCommaSeparator[] = ", ";

	38 const char kSpaceSeparator[] = " ";

	39 const char kArabicCommaSeparator[] = "\xD8\x8C" " "; /* "، " */

	40

	41 const char* kLanguagesThatUseSpace[] = {

	42 "th",

	43 "ko"

	44 };

	45

	46 const char* kLanguagesThatHaveNoSeparator[] = {

	47 "ja",

	48 "zh" // All Chinese variants.

	49 };

	50

	51 // This data is based on CLDR, for languages that are in official use in some

	52 // country, where Arabic is the most likely script tag.

	53 // TODO: Consider supporting variants such as tr-Arab by detecting the script

	54 // code.

	55 const char* kLanguagesThatUseAnArabicComma[] = {

	56 "ar",

	57 "az",

	58 "fa",

	59 "kk",

	60 "ku",

	61 "ky",

	62 "ps",

	63 "tg",

	64 "tk",

	65 "ur",

	66 "uz"

	67 };

	68

	69 std::string GetLineSeparatorForLanguage(const std::string& language_tag) {

	70 Language address_language(language_tag);

	71

	72 // First deal with explicit script tags.

	73 if (address_language.has_latin_script) {

	74 return kCommaSeparator;

	75 }

	76

	77 // Now guess something appropriate based on the base language.

	78 const std::string& base_language = address_language.base;

	79 if (std::find_if(kLanguagesThatUseSpace,

	80 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace),

	81 std::bind2nd(EqualToTolowerString(), base_language)) !=

	82 kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) {

	83 return kSpaceSeparator;

	84 } else if (std::find_if(

	85 kLanguagesThatHaveNoSeparator,

	86 kLanguagesThatHaveNoSeparator +

	87 arraysize(kLanguagesThatHaveNoSeparator),

	88 std::bind2nd(EqualToTolowerString(), base_language)) !=

	89 kLanguagesThatHaveNoSeparator +

	90 arraysize(kLanguagesThatHaveNoSeparator)) {

	91 return "";

	92 } else if (std::find_if(

	93 kLanguagesThatUseAnArabicComma,

	94 kLanguagesThatUseAnArabicComma +

	95 arraysize(kLanguagesThatUseAnArabicComma),

	96 std::bind2nd(EqualToTolowerString(), base_language)) !=

	97 kLanguagesThatUseAnArabicComma +

	98 arraysize(kLanguagesThatUseAnArabicComma)) {

	99 return kArabicCommaSeparator;

	100 }

	101 // Either the language is a latin-script language, or no language was

	102 // specified. In the latter case we still return ", " as the most common

	103 // separator in use. In countries that don't use this, e.g. Thailand,

	104 // addresses are often written in latin script where this would still be

	105 // appropriate, so this is a reasonable default in the absence of information.

	106 return kCommaSeparator;

	107 }

	108

	109 void CombineLinesForLanguage(

	110 const std::vector<std::string>& lines, const std::string& language_tag,

	111 std::string *line) {

	112 if (lines.size() > 0) {

	113 line->assign(lines[0]);

	114 }

	115 std::string separator = GetLineSeparatorForLanguage(language_tag);

	116 for (std::vector<std::string>::const_iterator it = lines.begin() + 1;

	117 it < lines.end(); ++it) {

	118 line->append(separator);

	119 line->append(*it);

	120 }

	121 }

	122

	123 } // namespace

	124

	125 void GetFormattedNationalAddress(

	126 const AddressData& address_data, std::vector<std::string>* lines) {

	127 assert(lines != NULL);

	128 lines->clear();

	129

	130 Rule rule;

	131 rule.CopyFrom(Rule::GetDefault());

	132 // TODO: Eventually, we should get the best rule for this country and

	133 // language, rather than just for the country.

	134 rule.ParseSerializedRule(RegionDataConstants::GetRegionData(

	135 address_data.region_code));

	136

	137 Language language(address_data.language_code);

	138

	139 // If latinized rules are available and the \|language_code\| of this address is

	140 // explicitly tagged as being Latin, then use the latinized formatting rules.

	141 const std::vector<std::vector<FormatElement> >& format =

	142 language.has_latin_script && !rule.GetLatinFormat().empty()

	143 ? rule.GetLatinFormat() : rule.GetFormat();

	144

	145 lines->push_back(std::string());

	146 for (size_t i = 0; i < format.size(); ++i) {

	147 if (!lines->back().empty()) {

	148 lines->push_back(std::string());

	149 }

	150 for (size_t j = 0; j < format[i].size(); ++j) {

	151 const FormatElement& element = format[i][j];

	152 if (element.IsField()) {

	153 if (element.field == STREET_ADDRESS) {

	154 // The field "street address" represents the street address lines of

	155 // an address, so there can be multiple values.

	156 if (lines->back().empty()) {

	157 lines->pop_back();

	158 }

	159 lines->insert(lines->end(),

	160 address_data.address_line.begin(),

	161 address_data.address_line.end());

	162 } else {

	163 lines->back().append(address_data.GetFieldValue(element.field));

	164 }

	165 } else {

	166 lines->back().append(element.literal);

	167 }

	168 }

	169 }

	170

	171 if (lines->back().empty()) {

	172 lines->pop_back();

	173 }

	174 }

	175

	176 void GetFormattedNationalAddressLine(

	177 const AddressData& address_data, std::string* line) {

	178 std::vector<std::string> address_lines;

	179 GetFormattedNationalAddress(address_data, &address_lines);

	180 CombineLinesForLanguage(address_lines, address_data.language_code, line);

	181 }

	182

	183 void GetStreetAddressLinesAsSingleLine(

	184 const AddressData& address_data, std::string* line) {

	185 CombineLinesForLanguage(

	186 address_data.address_line, address_data.language_code, line);

	187 }

	188

	189 } // namespace addressinput

	190 } // namespace i18n

OLD	NEW