chrome/browser/autocomplete/url_index_private_data.cc - Issue 976423002: Remove ScoreHistoryMatch::Builder

Side by Side Diff: chrome/browser/autocomplete/url_index_private_data.cc

Issue 976423002: Remove ScoreHistoryMatch::Builder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@cpplint

Patch Set: Fix unit tests Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/autocomplete/url_index_private_data.h"	5 #include "chrome/browser/autocomplete/url_index_private_data.h"

6	6

7 #include <functional>	7 #include <functional>

8 #include <iterator>	8 #include <iterator>

9 #include <limits>	9 #include <limits>

10 #include <numeric>	10 #include <numeric>

11 #include <string>	11 #include <string>

12 #include <vector>	12 #include <vector>

13	13

14 #include "base/basictypes.h"	14 #include "base/basictypes.h"

15 #include "base/files/file_util.h"	15 #include "base/files/file_util.h"

16 #include "base/i18n/break_iterator.h"	16 #include "base/i18n/break_iterator.h"

17 #include "base/i18n/case_conversion.h"	17 #include "base/i18n/case_conversion.h"

18 #include "base/metrics/histogram.h"	18 #include "base/metrics/histogram.h"

19 #include "base/strings/string_util.h"	19 #include "base/strings/string_util.h"

20 #include "base/strings/utf_string_conversions.h"	20 #include "base/strings/utf_string_conversions.h"

21 #include "base/time/time.h"	21 #include "base/time/time.h"

22 #include "chrome/browser/autocomplete/in_memory_url_index.h"	22 #include "chrome/browser/autocomplete/in_memory_url_index.h"

	23 #include "components/bookmarks/browser/bookmark_model.h"

23 #include "components/bookmarks/browser/bookmark_utils.h"	24 #include "components/bookmarks/browser/bookmark_utils.h"

24 #include "components/history/core/browser/history_database.h"	25 #include "components/history/core/browser/history_database.h"

25 #include "components/history/core/browser/history_db_task.h"	26 #include "components/history/core/browser/history_db_task.h"

26 #include "components/history/core/browser/history_service.h"	27 #include "components/history/core/browser/history_service.h"

27 #include "net/base/net_util.h"	28 #include "net/base/net_util.h"

28	29

29 #if defined(USE_SYSTEM_PROTOBUF)	30 #if defined(USE_SYSTEM_PROTOBUF)

30 #include <google/protobuf/repeated_field.h>	31 #include <google/protobuf/repeated_field.h>

31 #else	32 #else

32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	33 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

33 #endif	34 #endif

34	35

35 using google::protobuf::RepeatedField;	36 using google::protobuf::RepeatedField;

36 using google::protobuf::RepeatedPtrField;	37 using google::protobuf::RepeatedPtrField;

37 using in_memory_url_index::InMemoryURLIndexCacheItem;	38 using in_memory_url_index::InMemoryURLIndexCacheItem;

38	39

39 namespace {	40 namespace {

40 static const size_t kMaxVisitsToStoreInCache = 10u;	41 static const size_t kMaxVisitsToStoreInCache = 10u;

41 } // anonymous namespace	42 }

42	43

43 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem	44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem

44 WordListItem;	45 WordListItem;

45 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry	46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry

46 WordMapEntry;	47 WordMapEntry;

47 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem	49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem

49 CharWordMapItem;	50 CharWordMapItem;

50 typedef in_memory_url_index::	51 typedef in_memory_url_index::

51 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry;	52 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry;

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
145 pre_filter_item_count_(0),	146 pre_filter_item_count_(0),

146 post_filter_item_count_(0),	147 post_filter_item_count_(0),

147 post_scoring_item_count_(0) {	148 post_scoring_item_count_(0) {

148 }	149 }

149	150

150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	151 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

151 base::string16 search_string,	152 base::string16 search_string,

152 size_t cursor_position,	153 size_t cursor_position,

153 size_t max_matches,	154 size_t max_matches,

154 const std::string& languages,	155 const std::string& languages,

155 const ScoredHistoryMatch::Builder& builder) {	156 bookmarks::BookmarkModel* bookmark_model) {

156 // If cursor position is set and useful (not at either end of the	157 // If cursor position is set and useful (not at either end of the

157 // string), allow the search string to be broken at cursor position.	158 // string), allow the search string to be broken at cursor position.

158 // We do this by pretending there's a space where the cursor is.	159 // We do this by pretending there's a space where the cursor is.

159 if ((cursor_position != base::string16::npos) &&	160 if ((cursor_position != base::string16::npos) &&

160 (cursor_position < search_string.length()) &&	161 (cursor_position < search_string.length()) &&

161 (cursor_position > 0)) {	162 (cursor_position > 0)) {

162 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	163 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

163 }	164 }

164 pre_filter_item_count_ = 0;	165 pre_filter_item_count_ = 0;

165 post_filter_item_count_ = 0;	166 post_filter_item_count_ = 0;

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 // for in the database allows some whitespace "words" whereas Tokenize	240 // for in the database allows some whitespace "words" whereas Tokenize

240 // excludes a long list of whitespace.) One could write a scoring	241 // excludes a long list of whitespace.) One could write a scoring

241 // function that gives a reasonable order to matches when there	242 // function that gives a reasonable order to matches when there

242 // are no terms (i.e., all the words are some form of whitespace),	243 // are no terms (i.e., all the words are some form of whitespace),

243 // but this is such a rare edge case that it's not worth the time.	244 // but this is such a rare edge case that it's not worth the time.

244 return scored_items;	245 return scored_items;

245 }	246 }

246 scored_items =	247 scored_items =

247 std::for_each(	248 std::for_each(

248 history_id_set.begin(), history_id_set.end(),	249 history_id_set.begin(), history_id_set.end(),

249 AddHistoryMatch(*this, languages, lower_raw_string, lower_raw_terms,	250 AddHistoryMatch(bookmark_model, *this, languages, lower_raw_string,

250 base::Time::Now(), builder)).ScoredMatches();	251 lower_raw_terms, base::Time::Now())).ScoredMatches();

251	252

252 // Select and sort only the top \|max_matches\| results.	253 // Select and sort only the top \|max_matches\| results.

253 if (scored_items.size() > max_matches) {	254 if (scored_items.size() > max_matches) {

254 std::partial_sort(scored_items.begin(),	255 std::partial_sort(scored_items.begin(),

255 scored_items.begin() +	256 scored_items.begin() +

256 max_matches,	257 max_matches,

257 scored_items.end(),	258 scored_items.end(),

258 ScoredHistoryMatch::MatchScoreGreater);	259 ScoredHistoryMatch::MatchScoreGreater);

259 scored_items.resize(max_matches);	260 scored_items.resize(max_matches);

260 } else {	261 } else {

(...skipping 986 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1247 const std::set<std::string>& whitelist) {	1248 const std::set<std::string>& whitelist) {

1248 return whitelist.find(gurl.scheme()) != whitelist.end();	1249 return whitelist.find(gurl.scheme()) != whitelist.end();

1249 }	1250 }

1250	1251

1251	1252

1252 // SearchTermCacheItem ---------------------------------------------------------	1253 // SearchTermCacheItem ---------------------------------------------------------

1253	1254

1254 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(	1255 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

1255 const WordIDSet& word_id_set,	1256 const WordIDSet& word_id_set,

1256 const HistoryIDSet& history_id_set)	1257 const HistoryIDSet& history_id_set)

1257 : word_id_set_(word_id_set),	1258 : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {

1258 history_id_set_(history_id_set),	1259 }

1259 used_(true) {}

1260	1260

1261 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()	1261 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {

1262 : used_(true) {}	1262 }

1263	1263

1264 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}	1264 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {

1265	1265 }

1266	1266

1267 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------	1267 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

1268	1268

1269 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(	1269 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

	1270 bookmarks::BookmarkModel* bookmark_model,

1270 const URLIndexPrivateData& private_data,	1271 const URLIndexPrivateData& private_data,

1271 const std::string& languages,	1272 const std::string& languages,

1272 const base::string16& lower_string,	1273 const base::string16& lower_string,

1273 const String16Vector& lower_terms,	1274 const String16Vector& lower_terms,

1274 const base::Time now,	1275 const base::Time now)

1275 const ScoredHistoryMatch::Builder& builder)	1276 : bookmark_model_(bookmark_model),

1276 : private_data_(private_data),	1277 private_data_(private_data),

1277 languages_(languages),	1278 languages_(languages),

1278 builder_(builder),

1279 lower_string_(lower_string),	1279 lower_string_(lower_string),

1280 lower_terms_(lower_terms),	1280 lower_terms_(lower_terms),

1281 now_(now) {	1281 now_(now) {

1282 // Calculate offsets for each term. For instance, the offset for	1282 // Calculate offsets for each term. For instance, the offset for

1283 // ".net" should be 1, indicating that the actual word-part of the term	1283 // ".net" should be 1, indicating that the actual word-part of the term

1284 // starts at offset 1.	1284 // starts at offset 1.

1285 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);	1285 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);

1286 for (size_t i = 0; i < lower_terms_.size(); ++i) {	1286 for (size_t i = 0; i < lower_terms_.size(); ++i) {

1287 base::i18n::BreakIterator iter(lower_terms_[i],	1287 base::i18n::BreakIterator iter(lower_terms_[i],

1288 base::i18n::BreakIterator::BREAK_WORD);	1288 base::i18n::BreakIterator::BREAK_WORD);

1289 // If the iterator doesn't work, assume an offset of 0.	1289 // If the iterator doesn't work, assume an offset of 0.

1290 if (!iter.Init())	1290 if (!iter.Init())

1291 continue;	1291 continue;

1292 // Find the first word start.	1292 // Find the first word start.

1293 while (iter.Advance() && !iter.IsWord()) {}	1293 while (iter.Advance() && !iter.IsWord()) {}

1294 if (iter.IsWord())	1294 if (iter.IsWord())

1295 lower_terms_to_word_starts_offsets_[i] = iter.prev();	1295 lower_terms_to_word_starts_offsets_[i] = iter.prev();

1296 // Else: the iterator didn't find a word break. Assume an offset of 0.	1296 // Else: the iterator didn't find a word break. Assume an offset of 0.

1297 }	1297 }

1298 }	1298 }

1299	1299

1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}	1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {

	1301 }

1301	1302

1302 void URLIndexPrivateData::AddHistoryMatch::operator()(	1303 void URLIndexPrivateData::AddHistoryMatch::operator()(

1303 const HistoryID history_id) {	1304 const HistoryID history_id) {

1304 HistoryInfoMap::const_iterator hist_pos =	1305 HistoryInfoMap::const_iterator hist_pos =

1305 private_data_.history_info_map_.find(history_id);	1306 private_data_.history_info_map_.find(history_id);

1306 if (hist_pos != private_data_.history_info_map_.end()) {	1307 if (hist_pos != private_data_.history_info_map_.end()) {

1307 const history::URLRow& hist_item = hist_pos->second.url_row;	1308 const history::URLRow& hist_item = hist_pos->second.url_row;

1308 const VisitInfoVector& visits = hist_pos->second.visits;	1309 const VisitInfoVector& visits = hist_pos->second.visits;

1309 WordStartsMap::const_iterator starts_pos =	1310 WordStartsMap::const_iterator starts_pos =

1310 private_data_.word_starts_map_.find(history_id);	1311 private_data_.word_starts_map_.find(history_id);

1311 DCHECK(starts_pos != private_data_.word_starts_map_.end());	1312 DCHECK(starts_pos != private_data_.word_starts_map_.end());

1312 ScoredHistoryMatch match = builder_.Build(	1313 ScoredHistoryMatch match(

1313 hist_item, visits, languages_, lower_string_, lower_terms_,	1314 hist_item, visits, languages_, lower_string_, lower_terms_,

1314 lower_terms_to_word_starts_offsets_, starts_pos->second, now_);	1315 lower_terms_to_word_starts_offsets_, starts_pos->second,

	1316 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()),

	1317 now_);

1315 if (match.raw_score > 0)	1318 if (match.raw_score > 0)

1316 scored_matches_.push_back(match);	1319 scored_matches_.push_back(match);

1317 }	1320 }

1318 }	1321 }

1319	1322

1320	1323

1321 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	1324 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

1322	1325

1323 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	1326 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

1324 const HistoryInfoMap& history_info_map)	1327 const HistoryInfoMap& history_info_map)

1325 : history_info_map_(history_info_map) {	1328 : history_info_map_(history_info_map) {

1326 }	1329 }

1327	1330

1328 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}	1331 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {

	1332 }

1329	1333

1330 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(	1334 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

1331 const HistoryID h1,	1335 const HistoryID h1,

1332 const HistoryID h2) {	1336 const HistoryID h2) {

1333 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));	1337 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

1334 if (entry1 == history_info_map_.end())	1338 if (entry1 == history_info_map_.end())

1335 return false;	1339 return false;

1336 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));	1340 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

1337 if (entry2 == history_info_map_.end())	1341 if (entry2 == history_info_map_.end())

1338 return true;	1342 return true;

1339 const history::URLRow& r1(entry1->second.url_row);	1343 const history::URLRow& r1(entry1->second.url_row);

1340 const history::URLRow& r2(entry2->second.url_row);	1344 const history::URLRow& r2(entry2->second.url_row);

1341 // First cut: typed count, visit count, recency.	1345 // First cut: typed count, visit count, recency.

1342 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1346 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1343 // recently visited (within the last 12/24 hours) as highly important. Get	1347 // recently visited (within the last 12/24 hours) as highly important. Get

1344 // input from mpearson.	1348 // input from mpearson.

1345 if (r1.typed_count() != r2.typed_count())	1349 if (r1.typed_count() != r2.typed_count())

1346 return (r1.typed_count() > r2.typed_count());	1350 return (r1.typed_count() > r2.typed_count());

1347 if (r1.visit_count() != r2.visit_count())	1351 if (r1.visit_count() != r2.visit_count())

1348 return (r1.visit_count() > r2.visit_count());	1352 return (r1.visit_count() > r2.visit_count());

1349 return (r1.last_visit() > r2.last_visit());	1353 return (r1.last_visit() > r2.last_visit());

1350 }	1354 }

OLD	NEW