chrome/browser/history/url_index_private_data.cc - Issue 896983003: Componentize ScoredHistoryMatch

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 896983003: Componentize ScoredHistoryMatch (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rename ScoredHistoryMatchBuilder to ScoredHistoryMatchBuilderImpl Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« chrome/browser/history/in_memory_url_index_unittest.cc ('K') | « chrome/browser/history/url_index_private_data.h ('k') | chrome/chrome_browser.gypi » ('j') | components/history/core/browser/scored_history_match.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <functional>	7 #include <functional>

8 #include <iterator>	8 #include <iterator>

9 #include <limits>	9 #include <limits>

10 #include <numeric>	10 #include <numeric>

11 #include <string>	11 #include <string>

12 #include <vector>	12 #include <vector>

13	13

14 #include "base/basictypes.h"	14 #include "base/basictypes.h"

15 #include "base/files/file_util.h"	15 #include "base/files/file_util.h"

16 #include "base/i18n/break_iterator.h"	16 #include "base/i18n/break_iterator.h"

17 #include "base/i18n/case_conversion.h"	17 #include "base/i18n/case_conversion.h"

18 #include "base/metrics/histogram.h"	18 #include "base/metrics/histogram.h"

19 #include "base/strings/string_util.h"	19 #include "base/strings/string_util.h"

20 #include "base/strings/utf_string_conversions.h"	20 #include "base/strings/utf_string_conversions.h"

21 #include "base/time/time.h"	21 #include "base/time/time.h"

22 #include "chrome/browser/history/history_service.h"	22 #include "chrome/browser/history/history_service.h"

23 #include "chrome/browser/history/in_memory_url_index.h"	23 #include "chrome/browser/history/in_memory_url_index.h"

24 #include "components/bookmarks/browser/bookmark_utils.h"	24 #include "components/bookmarks/browser/bookmark_utils.h"

25 #include "components/history/core/browser/history_client.h"

26 #include "components/history/core/browser/history_database.h"	25 #include "components/history/core/browser/history_database.h"

27 #include "components/history/core/browser/history_db_task.h"	26 #include "components/history/core/browser/history_db_task.h"

28 #include "net/base/net_util.h"	27 #include "net/base/net_util.h"

29	28

30 #if defined(USE_SYSTEM_PROTOBUF)	29 #if defined(USE_SYSTEM_PROTOBUF)

31 #include <google/protobuf/repeated_field.h>	30 #include <google/protobuf/repeated_field.h>

32 #else	31 #else

33 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

34 #endif	33 #endif

35	34

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
144 pre_filter_item_count_(0),	143 pre_filter_item_count_(0),

145 post_filter_item_count_(0),	144 post_filter_item_count_(0),

146 post_scoring_item_count_(0) {	145 post_scoring_item_count_(0) {

147 }	146 }

148	147

149 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	148 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

150 base::string16 search_string,	149 base::string16 search_string,

151 size_t cursor_position,	150 size_t cursor_position,

152 size_t max_matches,	151 size_t max_matches,

153 const std::string& languages,	152 const std::string& languages,

154 HistoryClient* history_client) {	153 const history::ScoredHistoryMatch::Builder& builder) {

155 // If cursor position is set and useful (not at either end of the	154 // If cursor position is set and useful (not at either end of the

156 // string), allow the search string to be broken at cursor position.	155 // string), allow the search string to be broken at cursor position.

157 // We do this by pretending there's a space where the cursor is.	156 // We do this by pretending there's a space where the cursor is.

158 if ((cursor_position != base::string16::npos) &&	157 if ((cursor_position != base::string16::npos) &&

159 (cursor_position < search_string.length()) &&	158 (cursor_position < search_string.length()) &&

160 (cursor_position > 0)) {	159 (cursor_position > 0)) {

161 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	160 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

162 }	161 }

163 pre_filter_item_count_ = 0;	162 pre_filter_item_count_ = 0;

164 post_filter_item_count_ = 0;	163 post_filter_item_count_ = 0;

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
235 &lower_raw_terms) == 0) {	234 &lower_raw_terms) == 0) {

236 // Don't score matches when there are no terms to score against. (It's	235 // Don't score matches when there are no terms to score against. (It's

237 // possible that the word break iterater that extracts words to search	236 // possible that the word break iterater that extracts words to search

238 // for in the database allows some whitespace "words" whereas Tokenize	237 // for in the database allows some whitespace "words" whereas Tokenize

239 // excludes a long list of whitespace.) One could write a scoring	238 // excludes a long list of whitespace.) One could write a scoring

240 // function that gives a reasonable order to matches when there	239 // function that gives a reasonable order to matches when there

241 // are no terms (i.e., all the words are some form of whitespace),	240 // are no terms (i.e., all the words are some form of whitespace),

242 // but this is such a rare edge case that it's not worth the time.	241 // but this is such a rare edge case that it's not worth the time.

243 return scored_items;	242 return scored_items;

244 }	243 }

245 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),	244 scored_items =

246 AddHistoryMatch(*this, languages, history_client, lower_raw_string,	245 std::for_each(

247 lower_raw_terms, base::Time::Now())).ScoredMatches();	246 history_id_set.begin(), history_id_set.end(),

	247 AddHistoryMatch(*this, languages, lower_raw_string, lower_raw_terms,

	248 base::Time::Now(), builder)).ScoredMatches();

248	249

249 // Select and sort only the top \|max_matches\| results.	250 // Select and sort only the top \|max_matches\| results.

250 if (scored_items.size() > max_matches) {	251 if (scored_items.size() > max_matches) {

251 std::partial_sort(scored_items.begin(),	252 std::partial_sort(scored_items.begin(),

252 scored_items.begin() +	253 scored_items.begin() +

253 max_matches,	254 max_matches,

254 scored_items.end(),	255 scored_items.end(),

255 ScoredHistoryMatch::MatchScoreGreater);	256 ScoredHistoryMatch::MatchScoreGreater);

256 scored_items.resize(max_matches);	257 scored_items.resize(max_matches);

257 } else {	258 } else {

(...skipping 1001 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1259 : used_(true) {}	1260 : used_(true) {}

1260	1261

1261 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}	1262 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

1262	1263

1263	1264

1264 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------	1265 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

1265	1266

1266 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(	1267 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

1267 const URLIndexPrivateData& private_data,	1268 const URLIndexPrivateData& private_data,

1268 const std::string& languages,	1269 const std::string& languages,

1269 HistoryClient* history_client,

1270 const base::string16& lower_string,	1270 const base::string16& lower_string,

1271 const String16Vector& lower_terms,	1271 const String16Vector& lower_terms,

1272 const base::Time now)	1272 const base::Time now,

	1273 const ScoredHistoryMatch::Builder& builder)

1273 : private_data_(private_data),	1274 : private_data_(private_data),

1274 languages_(languages),	1275 languages_(languages),

1275 history_client_(history_client),	1276 builder_(builder),

1276 lower_string_(lower_string),	1277 lower_string_(lower_string),

1277 lower_terms_(lower_terms),	1278 lower_terms_(lower_terms),

1278 now_(now) {	1279 now_(now) {

1279 // Calculate offsets for each term. For instance, the offset for	1280 // Calculate offsets for each term. For instance, the offset for

1280 // ".net" should be 1, indicating that the actual word-part of the term	1281 // ".net" should be 1, indicating that the actual word-part of the term

1281 // starts at offset 1.	1282 // starts at offset 1.

1282 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);	1283 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);

1283 for (size_t i = 0; i < lower_terms_.size(); ++i) {	1284 for (size_t i = 0; i < lower_terms_.size(); ++i) {

1284 base::i18n::BreakIterator iter(lower_terms_[i],	1285 base::i18n::BreakIterator iter(lower_terms_[i],

1285 base::i18n::BreakIterator::BREAK_WORD);	1286 base::i18n::BreakIterator::BREAK_WORD);

(...skipping 13 matching lines...) Expand all Loading...
1299 void URLIndexPrivateData::AddHistoryMatch::operator()(	1300 void URLIndexPrivateData::AddHistoryMatch::operator()(

1300 const HistoryID history_id) {	1301 const HistoryID history_id) {

1301 HistoryInfoMap::const_iterator hist_pos =	1302 HistoryInfoMap::const_iterator hist_pos =

1302 private_data_.history_info_map_.find(history_id);	1303 private_data_.history_info_map_.find(history_id);

1303 if (hist_pos != private_data_.history_info_map_.end()) {	1304 if (hist_pos != private_data_.history_info_map_.end()) {

1304 const URLRow& hist_item = hist_pos->second.url_row;	1305 const URLRow& hist_item = hist_pos->second.url_row;

1305 const VisitInfoVector& visits = hist_pos->second.visits;	1306 const VisitInfoVector& visits = hist_pos->second.visits;

1306 WordStartsMap::const_iterator starts_pos =	1307 WordStartsMap::const_iterator starts_pos =

1307 private_data_.word_starts_map_.find(history_id);	1308 private_data_.word_starts_map_.find(history_id);

1308 DCHECK(starts_pos != private_data_.word_starts_map_.end());	1309 DCHECK(starts_pos != private_data_.word_starts_map_.end());

1309 ScoredHistoryMatch match(hist_item, visits, languages_, lower_string_,	1310 ScoredHistoryMatch match = builder_.Build(

1310 lower_terms_, lower_terms_to_word_starts_offsets_,	1311 hist_item, visits, languages_, lower_string_, lower_terms_,

1311 starts_pos->second, now_, history_client_);	1312 lower_terms_to_word_starts_offsets_, starts_pos->second, now_);

1312 if (match.raw_score() > 0)	1313 if (match.raw_score > 0)

1313 scored_matches_.push_back(match);	1314 scored_matches_.push_back(match);

1314 }	1315 }

1315 }	1316 }

1316	1317

1317	1318

1318 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	1319 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

1319	1320

1320 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	1321 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

1321 const HistoryInfoMap& history_info_map)	1322 const HistoryInfoMap& history_info_map)

1322 : history_info_map_(history_info_map) {	1323 : history_info_map_(history_info_map) {

(...skipping 17 matching lines...) Expand all Loading...
1340 // recently visited (within the last 12/24 hours) as highly important. Get	1341 // recently visited (within the last 12/24 hours) as highly important. Get

1341 // input from mpearson.	1342 // input from mpearson.

1342 if (r1.typed_count() != r2.typed_count())	1343 if (r1.typed_count() != r2.typed_count())

1343 return (r1.typed_count() > r2.typed_count());	1344 return (r1.typed_count() > r2.typed_count());

1344 if (r1.visit_count() != r2.visit_count())	1345 if (r1.visit_count() != r2.visit_count())

1345 return (r1.visit_count() > r2.visit_count());	1346 return (r1.visit_count() > r2.visit_count());

1346 return (r1.last_visit() > r2.last_visit());	1347 return (r1.last_visit() > r2.last_visit());

1347 }	1348 }

1348	1349

1349 } // namespace history	1350 } // namespace history

OLD	NEW