components/omnibox/browser/url_index_private_data.cc - Issue 2702413007: Cleaning up test only variables.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2702413007: Cleaning up test only variables. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
134 }	134 }

135	135

136	136

137 // URLIndexPrivateData ---------------------------------------------------------	137 // URLIndexPrivateData ---------------------------------------------------------

138	138

139 // static	139 // static

140 constexpr size_t URLIndexPrivateData::kMaxVisitsToStoreInCache;	140 constexpr size_t URLIndexPrivateData::kMaxVisitsToStoreInCache;

141	141

142 URLIndexPrivateData::URLIndexPrivateData()	142 URLIndexPrivateData::URLIndexPrivateData()

143 : restored_cache_version_(0),	143 : restored_cache_version_(0),

144 saved_cache_version_(kCurrentCacheFileVersion),	144 saved_cache_version_(kCurrentCacheFileVersion) {}

145 pre_filter_item_count_(0),

146 post_filter_item_count_(0),

147 post_scoring_item_count_(0) {

148 }

149	145

150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	146 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

151 base::string16 original_search_string,	147 base::string16 original_search_string,

152 size_t cursor_position,	148 size_t cursor_position,

153 size_t max_matches,	149 size_t max_matches,

154 bookmarks::BookmarkModel* bookmark_model,	150 bookmarks::BookmarkModel* bookmark_model,

155 TemplateURLService* template_url_service) {	151 TemplateURLService* template_url_service) {

156 pre_filter_item_count_ = 0;

157 post_filter_item_count_ = 0;

158 post_scoring_item_count_ = 0;

159

160 // This list will contain the original search string and any other string	152 // This list will contain the original search string and any other string

161 // transformations.	153 // transformations.

162 String16Vector search_strings;	154 String16Vector search_strings;

163 search_strings.push_back(original_search_string);	155 search_strings.push_back(original_search_string);

164 if ((cursor_position != base::string16::npos) &&	156 if ((cursor_position != base::string16::npos) &&

165 (cursor_position < original_search_string.length()) &&	157 (cursor_position < original_search_string.length()) &&

166 (cursor_position > 0)) {	158 (cursor_position > 0)) {

167 // The original search_string broken at cursor position. This is one type of	159 // The original search_string broken at cursor position. This is one type of

168 // transformation.	160 // transformation.

169 base::string16 transformed_search_string(original_search_string);	161 base::string16 transformed_search_string(original_search_string);

170 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	162 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

171 search_strings.push_back(transformed_search_string);	163 search_strings.push_back(transformed_search_string);

172 }	164 }

173	165

174 ScoredHistoryMatches scored_items;	166 ScoredHistoryMatches scored_items;

175 // Invalidate the term cache and return if we have indexed no words (probably	167 // Invalidate the term cache and return if we have indexed no words (probably

176 // because we've not been initialized yet).	168 // because we've not been initialized yet).

177 if (word_list_.empty()) {	169 if (word_list_.empty()) {

178 search_term_cache_.clear();	170 search_term_cache_.clear();

179 return scored_items;	171 return scored_items;

180 }	172 }

181 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	173 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

182 // approach.	174 // approach.

183 ResetSearchTermCache();	175 ResetSearchTermCache();

184	176

	177 bool history_ids_were_trimmed = false;

185 for (const base::string16& search_string : search_strings) {	178 for (const base::string16& search_string : search_strings) {

186 // The search string we receive may contain escaped characters. For reducing	179 // The search string we receive may contain escaped characters. For reducing

187 // the index we need individual, lower-cased words, ignoring escapings. For	180 // the index we need individual, lower-cased words, ignoring escapings. For

188 // the final filtering we need whitespace separated substrings possibly	181 // the final filtering we need whitespace separated substrings possibly

189 // containing escaped characters.	182 // containing escaped characters.

190 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	183 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

191 base::string16 lower_unescaped_string = net::UnescapeURLComponent(	184 base::string16 lower_unescaped_string = net::UnescapeURLComponent(

192 lower_raw_string,	185 lower_raw_string,

193 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	186 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	187 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

195	188

196 // Extract individual 'words' (as opposed to 'terms'; see comment in	189 // Extract individual 'words' (as opposed to 'terms'; see comment in

197 // HistoryIdSetToScoredMatches()) from the search string. When the user	190 // HistoryIdSetToScoredMatches()) from the search string. When the user

198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",	191 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",

199 // "mstone" and "release".	192 // "mstone" and "release".

200 String16Vector lower_words(	193 String16Vector lower_words(

201 String16VectorFromString16(lower_unescaped_string, false, nullptr));	194 String16VectorFromString16(lower_unescaped_string, false, nullptr));

202 if (lower_words.empty())	195 if (lower_words.empty())

203 continue;	196 continue;

204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);	197 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

205 pre_filter_item_count_ += history_id_set.size();	198 history_ids_were_trimmed =
	Peter Kasting 2017/02/23 01:04:45 Nit: Use \|= Nit: Use \|= dyaroshev 2017/02/23 01:41:26 This would be a bit operation - is it ok? I think Show quoted text On 2017/02/23 01:04:45, Peter Kasting (backlogged) wrote: > Nit: Use \|= This would be a bit operation - is it ok? I think that you are correct and compiler won't do lazy evaluation for bitwise or but to doublecheck, are you sure? Peter Kasting 2017/02/23 01:46:06 As you note, A \|= B is equivalent to A = A \| B. B Show quoted text On 2017/02/23 01:41:26, dyaroshev wrote: > On 2017/02/23 01:04:45, Peter Kasting (backlogged) wrote: > > Nit: Use \|= > > This would be a bit operation - is it ok? > I think that you are correct and compiler won't do lazy evaluation for bitwise > or but to doublecheck, are you sure? As you note, A \|= B is equivalent to A = A \| B. Bitwise-or never short-circuits, no matter the types of its arguments, so this is safe. dyaroshev 2017/02/24 01:27:03 Done. Show quoted text On 2017/02/23 01:46:06, Peter Kasting wrote: > On 2017/02/23 01:41:26, dyaroshev wrote: > > On 2017/02/23 01:04:45, Peter Kasting (backlogged) wrote: > > > Nit: Use \|= > > > > This would be a bit operation - is it ok? > > I think that you are correct and compiler won't do lazy evaluation for bitwise > > or but to doublecheck, are you sure? > > As you note, A \|= B is equivalent to A = A \| B. Bitwise-or never > short-circuits, no matter the types of its arguments, so this is safe. Done.
206 // Trim the candidate pool if it is large. Note that we do not filter out	199 TrimHistoryIdsPool(&history_id_set) \|\| history_ids_were_trimmed;

207 // items that do not contain the search terms as proper substrings --	200

208 // doing so is the performance-costly operation we are trying to avoid in

209 // order to maintain omnibox responsiveness.

210 const size_t kItemsToScoreLimit = 500;

211 if (history_id_set.size() > kItemsToScoreLimit) {

212 HistoryIDVector history_ids;

213 std::copy(history_id_set.begin(), history_id_set.end(),

214 std::back_inserter(history_ids));

215 // Trim down the set by sorting by typed-count, visit-count, and last

216 // visit.

217 HistoryItemFactorGreater item_factor_functor(history_info_map_);

218 std::partial_sort(history_ids.begin(),

219 history_ids.begin() + kItemsToScoreLimit,

220 history_ids.end(), item_factor_functor);

221 history_id_set.clear();

222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

223 std::inserter(history_id_set, history_id_set.end()));

224 post_filter_item_count_ += history_id_set.size();

225 } else {

226 post_filter_item_count_ += pre_filter_item_count_;

227 }

228 ScoredHistoryMatches temp_scored_items;	201 ScoredHistoryMatches temp_scored_items;

229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,	202 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,

230 template_url_service, bookmark_model,	203 template_url_service, bookmark_model,

231 &temp_scored_items);	204 &temp_scored_items);

232 scored_items.insert(scored_items.end(), temp_scored_items.begin(),	205 scored_items.insert(scored_items.end(), temp_scored_items.begin(),

233 temp_scored_items.end());	206 temp_scored_items.end());

234 }	207 }

235 // Select and sort only the top \|max_matches\| results.	208 // Select and sort only the top \|max_matches\| results.

236 if (scored_items.size() > max_matches) {	209 if (scored_items.size() > max_matches) {

237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,	210 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,

238 scored_items.end(),	211 scored_items.end(),

239 ScoredHistoryMatch::MatchScoreGreater);	212 ScoredHistoryMatch::MatchScoreGreater);

240 scored_items.resize(max_matches);	213 scored_items.resize(max_matches);

241 } else {	214 } else {

242 std::sort(scored_items.begin(), scored_items.end(),	215 std::sort(scored_items.begin(), scored_items.end(),

243 ScoredHistoryMatch::MatchScoreGreater);	216 ScoredHistoryMatch::MatchScoreGreater);

244 }	217 }

245 post_scoring_item_count_ = scored_items.size();	218 if (history_ids_were_trimmed) {

246 if (pre_filter_item_count_ > post_filter_item_count_) {

247 // If we trim the results set we do not want to cache the results for next	219 // If we trim the results set we do not want to cache the results for next

248 // time as the user's ultimately desired result could easily be eliminated	220 // time as the user's ultimately desired result could easily be eliminated

249 // in the early rough filter.	221 // in the early rough filter.

250 search_term_cache_.clear();	222 search_term_cache_.clear();

251 } else {	223 } else {

252 // Remove any stale SearchTermCacheItems.	224 // Remove any stale SearchTermCacheItems.

253 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();	225 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

254 cache_iter != search_term_cache_.end(); ) {	226 cache_iter != search_term_cache_.end(); ) {

255 if (!cache_iter->second.used_)	227 if (!cache_iter->second.used_)

256 cache_iter = search_term_cache_.erase(cache_iter);	228 cache_iter = search_term_cache_.erase(cache_iter);

(...skipping 208 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
465 data_copy->available_words_ = available_words_;	437 data_copy->available_words_ = available_words_;

466 data_copy->word_map_ = word_map_;	438 data_copy->word_map_ = word_map_;

467 data_copy->char_word_map_ = char_word_map_;	439 data_copy->char_word_map_ = char_word_map_;

468 data_copy->word_id_history_map_ = word_id_history_map_;	440 data_copy->word_id_history_map_ = word_id_history_map_;

469 data_copy->history_id_word_map_ = history_id_word_map_;	441 data_copy->history_id_word_map_ = history_id_word_map_;

470 data_copy->history_info_map_ = history_info_map_;	442 data_copy->history_info_map_ = history_info_map_;

471 data_copy->word_starts_map_ = word_starts_map_;	443 data_copy->word_starts_map_ = word_starts_map_;

472 return data_copy;	444 return data_copy;

473 // Not copied:	445 // Not copied:

474 // search_term_cache_	446 // search_term_cache_

475 // pre_filter_item_count_

476 // post_filter_item_count_

477 // post_scoring_item_count_

478 }	447 }

479	448

480 bool URLIndexPrivateData::Empty() const {	449 bool URLIndexPrivateData::Empty() const {

481 return history_info_map_.empty();	450 return history_info_map_.empty();

482 }	451 }

483	452

484 void URLIndexPrivateData::Clear() {	453 void URLIndexPrivateData::Clear() {

485 last_time_rebuilt_from_history_ = base::Time();	454 last_time_rebuilt_from_history_ = base::Time();

486 word_list_.clear();	455 word_list_.clear();

487 available_words_.clear();	456 available_words_.clear();

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
521 history_id_set.swap(term_history_set);	490 history_id_set.swap(term_history_set);

522 } else {	491 } else {

523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(	492 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(

524 history_id_set, term_history_set);	493 history_id_set, term_history_set);

525 history_id_set.swap(new_history_id_set);	494 history_id_set.swap(new_history_id_set);

526 }	495 }

527 }	496 }

528 return history_id_set;	497 return history_id_set;

529 }	498 }

530	499

	500 bool URLIndexPrivateData::TrimHistoryIdsPool(

	501 HistoryIDSet* history_id_set) const {

	502 constexpr size_t kItemsToScoreLimit = 500;

	503 if (history_id_set->size() < kItemsToScoreLimit)

	504 return false;

	505

	506 HistoryIDVector history_ids(history_id_set->begin(), history_id_set->end());

	507

	508 // Trim down the set by sorting by typed-count, visit-count, and last

	509 // visit.

	510 auto new_end = history_ids.begin() + kItemsToScoreLimit;

	511 HistoryItemFactorGreater item_factor_functor(history_info_map_);

	512

	513 std::nth_element(history_ids.begin(), new_end, history_ids.end(),

	514 item_factor_functor);

	515 history_ids.erase(new_end, history_ids.end());

	516

	517 *history_id_set = {history_ids.begin(), history_ids.end()};

	518 return true;

	519 }

	520

531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(	521 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

532 const base::string16& term) {	522 const base::string16& term) {

533 if (term.empty())	523 if (term.empty())

534 return HistoryIDSet();	524 return HistoryIDSet();

535	525

536 // TODO(mrossetti): Consider optimizing for very common terms such as	526 // TODO(mrossetti): Consider optimizing for very common terms such as

537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently	527 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

538 // occuring words in the user's searches.	528 // occuring words in the user's searches.

539	529

540 size_t term_length = term.length();	530 size_t term_length = term.length();

(...skipping 840 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1381 // First cut: typed count, visit count, recency.	1371 // First cut: typed count, visit count, recency.

1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1372 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1383 // recently visited (within the last 12/24 hours) as highly important. Get	1373 // recently visited (within the last 12/24 hours) as highly important. Get

1384 // input from mpearson.	1374 // input from mpearson.

1385 if (r1.typed_count() != r2.typed_count())	1375 if (r1.typed_count() != r2.typed_count())

1386 return (r1.typed_count() > r2.typed_count());	1376 return (r1.typed_count() > r2.typed_count());

1387 if (r1.visit_count() != r2.visit_count())	1377 if (r1.visit_count() != r2.visit_count())

1388 return (r1.visit_count() > r2.visit_count());	1378 return (r1.visit_count() > r2.visit_count());

1389 return (r1.last_visit() > r2.last_visit());	1379 return (r1.last_visit() > r2.last_visit());

1390 }	1380 }

OLD	NEW

« components/omnibox/browser/url_index_private_data.h ('K') | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »