chrome/browser/history/url_index_private_data.cc - Issue 903493002: Componentize ScoredHistoryMatch

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 903493002: Componentize ScoredHistoryMatch (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebase Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <functional>	7 #include <functional>

8 #include <iterator>	8 #include <iterator>

9 #include <limits>	9 #include <limits>

10 #include <numeric>	10 #include <numeric>

(...skipping 133 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
144 pre_filter_item_count_(0),	144 pre_filter_item_count_(0),

145 post_filter_item_count_(0),	145 post_filter_item_count_(0),

146 post_scoring_item_count_(0) {	146 post_scoring_item_count_(0) {

147 }	147 }

148	148

149 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	149 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

150 base::string16 search_string,	150 base::string16 search_string,

151 size_t cursor_position,	151 size_t cursor_position,

152 size_t max_matches,	152 size_t max_matches,

153 const std::string& languages,	153 const std::string& languages,

154 HistoryClient* history_client) {	154 const ScoredHistoryMatchClient* scored_history_match_client) {

155 // If cursor position is set and useful (not at either end of the	155 // If cursor position is set and useful (not at either end of the

156 // string), allow the search string to be broken at cursor position.	156 // string), allow the search string to be broken at cursor position.

157 // We do this by pretending there's a space where the cursor is.	157 // We do this by pretending there's a space where the cursor is.

158 if ((cursor_position != base::string16::npos) &&	158 if ((cursor_position != base::string16::npos) &&

159 (cursor_position < search_string.length()) &&	159 (cursor_position < search_string.length()) &&

160 (cursor_position > 0)) {	160 (cursor_position > 0)) {

161 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	161 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

162 }	162 }

163 pre_filter_item_count_ = 0;	163 pre_filter_item_count_ = 0;

164 post_filter_item_count_ = 0;	164 post_filter_item_count_ = 0;

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
235 &lower_raw_terms) == 0) {	235 &lower_raw_terms) == 0) {

236 // Don't score matches when there are no terms to score against. (It's	236 // Don't score matches when there are no terms to score against. (It's

237 // possible that the word break iterater that extracts words to search	237 // possible that the word break iterater that extracts words to search

238 // for in the database allows some whitespace "words" whereas Tokenize	238 // for in the database allows some whitespace "words" whereas Tokenize

239 // excludes a long list of whitespace.) One could write a scoring	239 // excludes a long list of whitespace.) One could write a scoring

240 // function that gives a reasonable order to matches when there	240 // function that gives a reasonable order to matches when there

241 // are no terms (i.e., all the words are some form of whitespace),	241 // are no terms (i.e., all the words are some form of whitespace),

242 // but this is such a rare edge case that it's not worth the time.	242 // but this is such a rare edge case that it's not worth the time.

243 return scored_items;	243 return scored_items;

244 }	244 }

245 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),	245 scored_items =

246 AddHistoryMatch(*this, languages, history_client, lower_raw_string,	246 std::for_each(

247 lower_raw_terms, base::Time::Now())).ScoredMatches();	247 history_id_set.begin(), history_id_set.end(),

	248 AddHistoryMatch(*this, languages, scored_history_match_client,

	249 lower_raw_string, lower_raw_terms, base::Time::Now()))

	250 .ScoredMatches();

248	251

249 // Select and sort only the top \|max_matches\| results.	252 // Select and sort only the top \|max_matches\| results.

250 if (scored_items.size() > max_matches) {	253 if (scored_items.size() > max_matches) {

251 std::partial_sort(scored_items.begin(),	254 std::partial_sort(scored_items.begin(),

252 scored_items.begin() +	255 scored_items.begin() +

253 max_matches,	256 max_matches,

254 scored_items.end(),	257 scored_items.end(),

255 ScoredHistoryMatch::MatchScoreGreater);	258 ScoredHistoryMatch::MatchScoreGreater);

256 scored_items.resize(max_matches);	259 scored_items.resize(max_matches);

257 } else {	260 } else {

(...skipping 1001 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1259 : used_(true) {}	1262 : used_(true) {}

1260	1263

1261 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}	1264 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

1262	1265

1263	1266

1264 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------	1267 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

1265	1268

1266 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(	1269 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

1267 const URLIndexPrivateData& private_data,	1270 const URLIndexPrivateData& private_data,

1268 const std::string& languages,	1271 const std::string& languages,

1269 HistoryClient* history_client,	1272 const ScoredHistoryMatchClient* scored_history_match_client,

1270 const base::string16& lower_string,	1273 const base::string16& lower_string,

1271 const String16Vector& lower_terms,	1274 const String16Vector& lower_terms,

1272 const base::Time now)	1275 const base::Time now)

1273 : private_data_(private_data),	1276 : private_data_(private_data),

1274 languages_(languages),	1277 languages_(languages),

1275 history_client_(history_client),	1278 scored_history_match_client_(scored_history_match_client),

1276 lower_string_(lower_string),	1279 lower_string_(lower_string),

1277 lower_terms_(lower_terms),	1280 lower_terms_(lower_terms),

1278 now_(now) {	1281 now_(now) {

1279 // Calculate offsets for each term. For instance, the offset for	1282 // Calculate offsets for each term. For instance, the offset for

1280 // ".net" should be 1, indicating that the actual word-part of the term	1283 // ".net" should be 1, indicating that the actual word-part of the term

1281 // starts at offset 1.	1284 // starts at offset 1.

1282 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);	1285 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);

1283 for (size_t i = 0; i < lower_terms_.size(); ++i) {	1286 for (size_t i = 0; i < lower_terms_.size(); ++i) {

1284 base::i18n::BreakIterator iter(lower_terms_[i],	1287 base::i18n::BreakIterator iter(lower_terms_[i],

1285 base::i18n::BreakIterator::BREAK_WORD);	1288 base::i18n::BreakIterator::BREAK_WORD);

(...skipping 15 matching lines...) Expand all Loading...
1301 HistoryInfoMap::const_iterator hist_pos =	1304 HistoryInfoMap::const_iterator hist_pos =

1302 private_data_.history_info_map_.find(history_id);	1305 private_data_.history_info_map_.find(history_id);

1303 if (hist_pos != private_data_.history_info_map_.end()) {	1306 if (hist_pos != private_data_.history_info_map_.end()) {

1304 const URLRow& hist_item = hist_pos->second.url_row;	1307 const URLRow& hist_item = hist_pos->second.url_row;

1305 const VisitInfoVector& visits = hist_pos->second.visits;	1308 const VisitInfoVector& visits = hist_pos->second.visits;

1306 WordStartsMap::const_iterator starts_pos =	1309 WordStartsMap::const_iterator starts_pos =

1307 private_data_.word_starts_map_.find(history_id);	1310 private_data_.word_starts_map_.find(history_id);

1308 DCHECK(starts_pos != private_data_.word_starts_map_.end());	1311 DCHECK(starts_pos != private_data_.word_starts_map_.end());

1309 ScoredHistoryMatch match(hist_item, visits, languages_, lower_string_,	1312 ScoredHistoryMatch match(hist_item, visits, languages_, lower_string_,

1310 lower_terms_, lower_terms_to_word_starts_offsets_,	1313 lower_terms_, lower_terms_to_word_starts_offsets_,

1311 starts_pos->second, now_, history_client_);	1314 starts_pos->second, now_,

	1315 scored_history_match_client_);

1312 if (match.raw_score() > 0)	1316 if (match.raw_score() > 0)

1313 scored_matches_.push_back(match);	1317 scored_matches_.push_back(match);

1314 }	1318 }

1315 }	1319 }

1316	1320

1317	1321

1318 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	1322 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

1319	1323

1320 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	1324 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

1321 const HistoryInfoMap& history_info_map)	1325 const HistoryInfoMap& history_info_map)

(...skipping 18 matching lines...) Expand all Loading...
1340 // recently visited (within the last 12/24 hours) as highly important. Get	1344 // recently visited (within the last 12/24 hours) as highly important. Get

1341 // input from mpearson.	1345 // input from mpearson.

1342 if (r1.typed_count() != r2.typed_count())	1346 if (r1.typed_count() != r2.typed_count())

1343 return (r1.typed_count() > r2.typed_count());	1347 return (r1.typed_count() > r2.typed_count());

1344 if (r1.visit_count() != r2.visit_count())	1348 if (r1.visit_count() != r2.visit_count())

1345 return (r1.visit_count() > r2.visit_count());	1349 return (r1.visit_count() > r2.visit_count());

1346 return (r1.last_visit() > r2.last_visit());	1350 return (r1.last_visit() > r2.last_visit());

1347 }	1351 }

1348	1352

1349 } // namespace history	1353 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/chrome_browser.gypi » ('j') | no next file with comments »