components/autofill/core/browser/autofill_profile_comparator.cc - Issue 2088443002: Expand autofill profile merge logic.

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « components/autofill/core/browser/autofill_profile_comparator.h ('k') | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/autofill/core/browser/autofill_profile_comparator.h"	5 #include "components/autofill/core/browser/autofill_profile_comparator.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <vector>	8 #include <vector>

9	9

	10 #include "base/i18n/case_conversion.h"

10 #include "base/i18n/char_iterator.h"	11 #include "base/i18n/char_iterator.h"

	12 #include "base/strings/string_piece.h"

11 #include "base/strings/string_split.h"	13 #include "base/strings/string_split.h"

12 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

13 #include "base/strings/utf_string_conversion_utils.h"	15 #include "base/strings/utf_string_conversion_utils.h"

14 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

15 #include "components/autofill/core/browser/autofill_data_util.h"	17 #include "components/autofill/core/browser/autofill_data_util.h"

16 #include "third_party/libphonenumber/phonenumber_api.h"	18 #include "third_party/libphonenumber/phonenumber_api.h"

17	19

18 namespace autofill {	20 namespace autofill {

19 namespace {	21 namespace {

20	22

21 const base::char16 kSpace[] = {L' ', L'\0'};	23 const base::char16 kSpace[] = {L' ', L'\0'};

22	24

	25 bool ContainsNewline(base::StringPiece16 text) {

	26 return text.find('\n') != base::StringPiece16::npos;

	27 }

	28

23 } // namespace	29 } // namespace

24	30

25 AutofillProfileComparator::AutofillProfileComparator(	31 AutofillProfileComparator::AutofillProfileComparator(

26 const base::StringPiece& app_locale)	32 const base::StringPiece& app_locale)

27 : app_locale_(app_locale.data(), app_locale.size()) {	33 : app_locale_(app_locale.data(), app_locale.size()) {

28 // Use ICU transliteration to remove diacritics and fold case.	34 // Use ICU transliteration to remove diacritics and fold case.

29 // See http://userguide.icu-project.org/transforms/general	35 // See http://userguide.icu-project.org/transforms/general

30 UErrorCode status = U_ZERO_ERROR;	36 UErrorCode status = U_ZERO_ERROR;

31 std::unique_ptr<icu::Transliterator> transliterator(	37 std::unique_ptr<icu::Transliterator> transliterator(

32 icu::Transliterator::createInstance(	38 icu::Transliterator::createInstance(

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,	112 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,

107 const AutofillProfile& p2) const {	113 const AutofillProfile& p2) const {

108 // Sorted in order to relative expense of the tests to fail early and cheaply	114 // Sorted in order to relative expense of the tests to fail early and cheaply

109 // if possible.	115 // if possible.

110 return HaveMergeableEmailAddresses(p1, p2) &&	116 return HaveMergeableEmailAddresses(p1, p2) &&

111 HaveMergeableCompanyNames(p1, p2) &&	117 HaveMergeableCompanyNames(p1, p2) &&

112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&	118 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&

113 HaveMergeableAddresses(p1, p2);	119 HaveMergeableAddresses(p1, p2);

114 }	120 }

115	121

	122 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,

	123 const AutofillProfile& p2,

	124 NameInfo* name_info) const {

	125 DCHECK(HaveMergeableNames(p1, p2));

	126

	127 const AutofillType kFullName(NAME_FULL);

	128 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);

	129 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);

	130 const base::string16& normalized_full_name_1 =

	131 NormalizeForComparison(full_name_1);

	132 const base::string16& normalized_full_name_2 =

	133 NormalizeForComparison(full_name_2);

	134

	135 const base::string16* best_name = nullptr;

	136 if (normalized_full_name_1.empty()) {

	137 // p1 has no name, so use the name from p2.

	138 best_name = &full_name_2;

	139 } else if (normalized_full_name_2.empty()) {

	140 // p2 has no name, so use the name from p1.

	141 best_name = &full_name_1;

	142 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {

	143 // full_name_2 is a variant of full_name_1.

	144 best_name = &full_name_1;

	145 } else {

	146 // If the assertion that p1 and p2 have mergeable names is true, then

	147 // full_name_1 must be a name variant of full_name_2;

	148 best_name = &full_name_2;

	149 }

	150

	151 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);

	152 return true;

	153 }

	154

	155 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,

	156 const AutofillProfile& p2,

	157 Address* address) const {

	158 DCHECK(HaveMergeableAddresses(p1, p2));

	159

	160 // One of the countries is empty or they are the same modulo case, so we just

	161 // have to find the non-empty one, if any.

	162 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

	163 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

	164 address->SetInfo(

	165 kCountryCode,

	166 base::i18n::ToUpper(

	167 country1.empty() ? country1 : p2.GetInfo(kCountryCode, app_locale_)),

	168 app_locale_);

	169

	170 // One of the zip codes is empty, they are the same, or one is a substring

	171 // of the other. So, we have to find the longest one.

	172 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

	173 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);

	174 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);

	175 address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2),

	176 app_locale_);

	177

	178 // One of the states is empty or one of the states has a subset of tokens from

	179 // the other. Pick the non-empty state that is shorter. This is usually the

	180 // abbreviated one.

	181 const AutofillType kState(ADDRESS_HOME_STATE);

	182 const base::string16& state1 = p1.GetInfo(kState, app_locale_);

	183 const base::string16& state2 = p2.GetInfo(kState, app_locale_);

	184 if (state1.empty()) {

	185 address->SetInfo(kState, state2, app_locale_);

	186 } else if (state2.empty()) {

	187 address->SetInfo(kState, state1, app_locale_);

	188 } else {

	189 address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2),

	190 app_locale_);

	191 }

	192

	193 // One of the cities is empty or one of the cities has a subset of tokens from

	194 // the other. Pick the non-empty city that is shorter. This is usually the

	195 // abbreviated one.

	196 const AutofillType kCity(ADDRESS_HOME_STATE);

	197 const base::string16& city1 = p1.GetInfo(kCity, app_locale_);

	198 const base::string16& city2 = p2.GetInfo(kCity, app_locale_);

	199 if (city1.empty()) {

	200 address->SetInfo(kCity, city2, app_locale_);

	201 } else if (city2.empty()) {

	202 address->SetInfo(kCity, city1, app_locale_);

	203 } else {

	204 address->SetInfo(kCity, (city1.size() < city2.size() ? city1 : city2),

	205 app_locale_);

	206 }

	207

	208 // One of the addresses is empty or one of the addresses has a subset of

	209 // tokens from the other. Pick the non-em that is shorter. This is usually the

	210 // abbreviated one.

	211 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);

	212 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);

	213 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);

	214 // If one of the addresses is empty then use the other.

	215 if (address1.empty()) {

	216 address->SetInfo(kStreetAddress, address2, app_locale_);

	217 } else if (address2.empty()) {

	218 address->SetInfo(kStreetAddress, address1, app_locale_);

	219 } else {

	220 // Prefer the multi-line address if one is multi-line and the other isn't.

	221 bool address1_multiline = ContainsNewline(address1);

	222 bool address2_multiline = ContainsNewline(address2);

	223 if (address1_multiline && !address2_multiline) {

	224 address->SetInfo(kStreetAddress, address1, app_locale_);

	225 } else if (address2_multiline && !address1_multiline) {

	226 address->SetInfo(kStreetAddress, address2, app_locale_);

	227 } else {

	228 // Prefer the one with more tokens if they're both single-line or both

	229 // multi-line addresses.

	230 int result = CompareTokens(NormalizeForComparison(address1),

	231 NormalizeForComparison(address2));

	232 switch (result) {

	233 case 0:
	Mathieu 2016/06/21 18:55:07 use enum? use enum? Roger McFarlane (Chromium) 2016/06/23 18:27:38 Done. Show quoted text On 2016/06/21 18:55:07, Mathieu Perreault wrote: > use enum? Done.
	234 // They have the same set of unique tokens. Let's pick the one that's

	235 // longer.

	236 address->SetInfo(

	237 kStreetAddress,

	238 (address1.size() > address2.size() ? address1 : address2),

	239 app_locale_);

	240 break;

	241 case 1:

	242 // address1 has more unique tokens than address2.

	243 address->SetInfo(kStreetAddress, address1, app_locale_);

	244 break;

	245 case 2:

	246 // address2 has more unique tokens than address1.

	247 address->SetInfo(kStreetAddress, address1, app_locale_);

	248 break;

	249 default:

	250 // The addresses aren't mergeable and we shouldn't be doing any of

	251 // this.

	252 NOTREACHED();

	253 return false;

	254 }

	255 }

	256 }

	257 return true;

	258 }

	259

116 // static	260 // static

117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(	261 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(

118 base::StringPiece16 s) {	262 base::StringPiece16 s) {

119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(	263 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(

120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	264 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());	265 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());

122 }	266 }

123	267

124 // static	268 // static

125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1,	269 AutofillProfileComparator::CompareTokensResult

126 base::StringPiece16 s2) {	270 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,

	271 base::StringPiece16 s2) {

	272 // Note: std::include() expects the items in each range to be in sorted order,

	273 // hence the use of std::set<> instead of std::unordered_set<>.

127 std::set<base::StringPiece16> t1 = UniqueTokens(s1);	274 std::set<base::StringPiece16> t1 = UniqueTokens(s1);

128 std::set<base::StringPiece16> t2 = UniqueTokens(s2);	275 std::set<base::StringPiece16> t2 = UniqueTokens(s2);

129	276

130 // Note: std::include() expects the items in each range to be in sorted order,	277 // Does s1 contains all of the tokens in s2? As a special case, return 0 if

131 // hence the use of std::set<> instead of std::unordered_set<>.	278 // the two sets are exactly the samel.

132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) \|\|	279 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))

133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end());	280 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;

	281

	282 // Does s2 contain all of the tokens in s1?

	283 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))

	284 return S2_CONTAINS_S1;

	285

	286 // Neither string contains all of the tokens from the other.

	287 return DIFFERENT_TOKENS;

134 }	288 }

135	289

136 // static	290 // static

137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(	291 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(

138 const base::string16& name_part) {	292 const base::string16& name_part) {

139 const size_t kMaxSupportedSubNames = 8;	293 const size_t kMaxSupportedSubNames = 8;

140	294

141 std::vector<base::string16> sub_names = base::SplitString(	295 std::vector<base::string16> sub_names = base::SplitString(

142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	296 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

143	297

144 // Limit the number of sub-names we support (to constrain memory usage);	298 // Limit the number of sub-names we support (to constrain memory usage);

145 if (sub_names.size() > kMaxSupportedSubNames)	299 if (sub_names.size() > kMaxSupportedSubNames)

146 return {name_part};	300 return {name_part};

147	301

148 // Start with the empty string as a variant.	302 // Start with the empty string as a variant.

149 std::set<base::string16> variants = {base::EmptyString16()};	303 std::set<base::string16> variants = {base::EmptyString16()};

150	304

151 // For each sub-name, add a variant of all the already existing variants that	305 // For each sub-name, add a variant of all the already existing variants that

152 // appends this sub-name and one that appends the initial of this sub-name.	306 // appends this sub-name and one that appends the initial of this sub-name.

153 // Duplicates will be discarded when they're added to the variants set.	307 // Duplicates will be discarded when they're added to the variants set.

154 for (const base::string16& sub_name : sub_names) {	308 for (const base::string16& sub_name : sub_names) {

155 if (sub_name.empty()) continue;	309 if (sub_name.empty())

	310 continue;

156 std::vector<base::string16> new_variants;	311 std::vector<base::string16> new_variants;

157 for (const base::string16& variant : variants) {	312 for (const base::string16& variant : variants) {

158 new_variants.push_back(base::CollapseWhitespace(	313 new_variants.push_back(base::CollapseWhitespace(

159 base::JoinString({variant, sub_name}, kSpace), true));	314 base::JoinString({variant, sub_name}, kSpace), true));

160 new_variants.push_back(base::CollapseWhitespace(	315 new_variants.push_back(base::CollapseWhitespace(

161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));	316 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));

162 }	317 }

163 variants.insert(new_variants.begin(), new_variants.end());	318 variants.insert(new_variants.begin(), new_variants.end());

164 }	319 }

165	320

166 // As a common case, also add the variant that just concatenates all of the	321 // As a common case, also add the variant that just concatenates all of the

167 // initials.	322 // initials.

168 base::string16 initials;	323 base::string16 initials;

169 for (const base::string16& sub_name : sub_names) {	324 for (const base::string16& sub_name : sub_names) {

170 if (sub_name.empty()) continue;	325 if (sub_name.empty())

	326 continue;

171 initials.push_back(sub_name[0]);	327 initials.push_back(sub_name[0]);

172 }	328 }

173 variants.insert(initials);	329 variants.insert(initials);

174	330

175 // And, we're done.	331 // And, we're done.

176 return variants;	332 return variants;

177 }	333 }

178	334

179 bool AutofillProfileComparator::IsNameVariantOf(	335 bool AutofillProfileComparator::IsNameVariantOf(

180 const base::string16& full_name_1,	336 const base::string16& full_name_1,

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246 }	402 }

247	403

248 bool AutofillProfileComparator::HaveMergeableCompanyNames(	404 bool AutofillProfileComparator::HaveMergeableCompanyNames(

249 const AutofillProfile& p1,	405 const AutofillProfile& p1,

250 const AutofillProfile& p2) const {	406 const AutofillProfile& p2) const {

251 const base::string16& company_name_1 = NormalizeForComparison(	407 const base::string16& company_name_1 = NormalizeForComparison(

252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	408 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

253 const base::string16& company_name_2 = NormalizeForComparison(	409 const base::string16& company_name_2 = NormalizeForComparison(

254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	410 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

255 return company_name_1.empty() \|\| company_name_2.empty() \|\|	411 return company_name_1.empty() \|\| company_name_2.empty() \|\|

256 HaveSameTokens(company_name_1, company_name_2);	412 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;

257 }	413 }

258	414

259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(	415 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(

260 const AutofillProfile& p1,	416 const AutofillProfile& p1,

261 const AutofillProfile& p2) const {	417 const AutofillProfile& p2) const {

262 // We work with the raw phone numbers to avoid losing any helpful information	418 // We work with the raw phone numbers to avoid losing any helpful information

263 // as we parse.	419 // as we parse.

264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	420 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	421 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

266	422

(...skipping 26 matching lines...) Expand all Loading...
293	449

294 NOTREACHED();	450 NOTREACHED();

295 return false;	451 return false;

296 }	452 }

297	453

298 bool AutofillProfileComparator::HaveMergeableAddresses(	454 bool AutofillProfileComparator::HaveMergeableAddresses(

299 const AutofillProfile& p1,	455 const AutofillProfile& p1,

300 const AutofillProfile& p2) const {	456 const AutofillProfile& p2) const {

301 // If the address are not in the same country, then they're not the same. If	457 // If the address are not in the same country, then they're not the same. If

302 // one of the address countries is unknown/invalid the comparison continues.	458 // one of the address countries is unknown/invalid the comparison continues.

303 const base::string16& country1 = p1.GetInfo(	459 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);	460 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

305 const base::string16& country2 = p2.GetInfo(	461 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);

306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);

307 if (!country1.empty() && !country2.empty() &&	462 if (!country1.empty() && !country2.empty() &&

308 !case_insensitive_compare_.StringsEqual(country1, country2)) {	463 !case_insensitive_compare_.StringsEqual(country1, country2)) {

309 return false;	464 return false;

310 }	465 }

311	466

312 // TODO(rogerm): Lookup the normalization rules for the (common) country of	467 // TODO(rogerm): Lookup the normalization rules for the (common) country of

313 // the address. The rules should be applied post NormalizeForComparison to	468 // the address. The rules should be applied post NormalizeForComparison to

314 // the state, city, and address bag of words comparisons.	469 // the state, city, and address bag of words comparisons.

315	470

316 // Zip	471 // Zip

317 // ----	472 // ----

318 // If the addresses are definitely not in the same zip/area code then we're	473 // If the addresses are definitely not in the same zip/area code then we're

319 // done. Otherwise,the comparison continues.	474 // done. Otherwise,the comparison continues.

	475 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

320 const base::string16& zip1 = NormalizeForComparison(	476 const base::string16& zip1 = NormalizeForComparison(

321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	477 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

322 DISCARD_WHITESPACE);

323 const base::string16& zip2 = NormalizeForComparison(	478 const base::string16& zip2 = NormalizeForComparison(

324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	479 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

325 DISCARD_WHITESPACE);

326 if (!zip1.empty() && !zip2.empty() &&	480 if (!zip1.empty() && !zip2.empty() &&

327 zip1.find(zip2) == base::string16::npos &&	481 zip1.find(zip2) == base::string16::npos &&

328 zip2.find(zip1) == base::string16::npos) {	482 zip2.find(zip1) == base::string16::npos) {

329 return false;	483 return false;

330 }	484 }

331	485

332 // State	486 // State

333 // ------	487 // ------

334 // Heuristic: If the match is between non-empty zip codes then we can infer	488 // Heuristic: States are mergeable if one is a (possibly empty) bag of words

	489 // subset of the other.

	490 //

	491 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

335 // that the two state strings are intended to have the same meaning. This	492 // that the two state strings are intended to have the same meaning. This

336 // handles the cases where we have invalid or poorly formed data in one of the	493 // handles the cases where we have invalid or poorly formed data in one of the

337 // state values (like "Select one", or "CA - California"). Otherwise, we	494 // state values (like "Select one", or "CA - California").

338 // actually have to check if the states map to the the same set of tokens.	495 const AutofillType kState(ADDRESS_HOME_STATE);

339 const base::string16& state1 = NormalizeForComparison(	496 const base::string16& state1 =

340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	497 NormalizeForComparison(p1.GetInfo(kState, app_locale_));

341 const base::string16& state2 = NormalizeForComparison(	498 const base::string16& state2 =

342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	499 NormalizeForComparison(p2.GetInfo(kState, app_locale_));

343 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(state1, state2)) {	500 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

344 return false;	501 return false;

345 }	502 }

346	503

347 // City	504 // City

348 // ------	505 // ------

349 // Heuristic: If the match is between non-empty zip codes then we can infer	506 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words

	507 // subset of the other.

	508 //

	509 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

350 // that the two city strings are intended to have the same meaning. This	510 // that the two city strings are intended to have the same meaning. This

351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we	511 // handles the cases where we have a city vs one of its suburbs.

352 // actually have to check if the cities map to the the same set of tokens.

353 const base::string16& city1 = NormalizeForComparison(	512 const base::string16& city1 = NormalizeForComparison(

354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	513 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

355 const base::string16& city2 = NormalizeForComparison(	514 const base::string16& city2 = NormalizeForComparison(

356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	515 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

357 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(city1, city2)) {	516 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {

358 return false;	517 return false;

359 }	518 }

360	519

361 // Address	520 // Address

362 // --------	521 // --------

363 // Heuristic: Use bag of words comparison on the post-normalized addresses.	522 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag

	523 // of words subset of the other.

364 const base::string16& address1 = NormalizeForComparison(	524 const base::string16& address1 = NormalizeForComparison(

365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	525 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

366 const base::string16& address2 = NormalizeForComparison(	526 const base::string16& address2 = NormalizeForComparison(

367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	527 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

368 if (!HaveSameTokens(address1, address2)) {	528 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {

369 return false;	529 return false;

370 }	530 }

371	531

372 return true;	532 return true;

373 }	533 }

374	534

375 } // namespace autofill	535 } // namespace autofill

OLD	NEW