chrome/browser/autocomplete/url_index_private_data.cc - Issue 959343004: Move InMemoryURLIndex outside of history namespace

Side by Side Diff: chrome/browser/autocomplete/url_index_private_data.cc

Issue 959343004: Move InMemoryURLIndex outside of history namespace (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@in-memory-url-index

Patch Set: Rebase Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/autocomplete/url_index_private_data.h"	5 #include "chrome/browser/autocomplete/url_index_private_data.h"

6	6

7 #include <functional>	7 #include <functional>

8 #include <iterator>	8 #include <iterator>

9 #include <limits>	9 #include <limits>

10 #include <numeric>	10 #include <numeric>

(...skipping 22 matching lines...) Expand all Loading...
33 #endif	33 #endif

34	34

35 using google::protobuf::RepeatedField;	35 using google::protobuf::RepeatedField;

36 using google::protobuf::RepeatedPtrField;	36 using google::protobuf::RepeatedPtrField;

37 using in_memory_url_index::InMemoryURLIndexCacheItem;	37 using in_memory_url_index::InMemoryURLIndexCacheItem;

38	38

39 namespace {	39 namespace {

40 static const size_t kMaxVisitsToStoreInCache = 10u;	40 static const size_t kMaxVisitsToStoreInCache = 10u;

41 } // anonymous namespace	41 } // anonymous namespace

42	42

43 namespace history {	43 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem

44	44 WordListItem;

45 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;	45 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry

46 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;	46 WordMapEntry;

47 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	47 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

48 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;	48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem

49 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry	49 CharWordMapItem;

50 CharWordMapEntry;	50 typedef in_memory_url_index::

51 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem	51 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry;

	52 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

52 WordIDHistoryMapItem;	53 WordIDHistoryMapItem;

53 typedef imui::	54 typedef in_memory_url_index::

54 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry	55 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

55 WordIDHistoryMapEntry;	56 WordIDHistoryMapEntry;

56 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;	57 typedef in_memory_url_index::InMemoryURLIndexCacheItem_HistoryInfoMapItem

57 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry	58 HistoryInfoMapItem;

58 HistoryInfoMapEntry;	59 typedef in_memory_url_index::

59 typedef imui::	60 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

	61 HistoryInfoMapEntry;

	62 typedef in_memory_url_index::

60 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo	63 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo

61 HistoryInfoMapEntry_VisitInfo;	64 HistoryInfoMapEntry_VisitInfo;

62 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;	65 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem

63 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry	66 WordStartsMapItem;

64 WordStartsMapEntry;	67 typedef in_memory_url_index::

65	68 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

	69 WordStartsMapEntry;

66	70

67 // Algorithm Functions ---------------------------------------------------------	71 // Algorithm Functions ---------------------------------------------------------

68	72

69 // Comparison function for sorting search terms by descending length.	73 // Comparison function for sorting search terms by descending length.

70 bool LengthGreater(const base::string16& string_a,	74 bool LengthGreater(const base::string16& string_a,

71 const base::string16& string_b) {	75 const base::string16& string_b) {

72 return string_a.length() > string_b.length();	76 return string_a.length() > string_b.length();

73 }	77 }

74	78

75	79

76 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------	80 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

77	81

78 // HistoryDBTask used to update the recent visit data for a particular	82 // HistoryDBTask used to update the recent visit data for a particular

79 // row from the history database.	83 // row from the history database.

80 class UpdateRecentVisitsFromHistoryDBTask : public HistoryDBTask {	84 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {

81 public:	85 public:

82 explicit UpdateRecentVisitsFromHistoryDBTask(	86 explicit UpdateRecentVisitsFromHistoryDBTask(

83 URLIndexPrivateData* private_data,	87 URLIndexPrivateData* private_data,

84 URLID url_id);	88 history::URLID url_id);

85	89

86 bool RunOnDBThread(HistoryBackend* backend,	90 bool RunOnDBThread(history::HistoryBackend* backend,

87 history::HistoryDatabase* db) override;	91 history::HistoryDatabase* db) override;

88 void DoneRunOnMainThread() override;	92 void DoneRunOnMainThread() override;

89	93

90 private:	94 private:

91 ~UpdateRecentVisitsFromHistoryDBTask() override;	95 ~UpdateRecentVisitsFromHistoryDBTask() override;

92	96

93 // The URLIndexPrivateData that gets updated after the historyDB	97 // The URLIndexPrivateData that gets updated after the historyDB

94 // task returns.	98 // task returns.

95 URLIndexPrivateData* private_data_;	99 URLIndexPrivateData* private_data_;

96 // The ID of the URL to get visits for and then update.	100 // The ID of the URL to get visits for and then update.

97 URLID url_id_;	101 history::URLID url_id_;

98 // Whether fetching the recent visits for the URL succeeded.	102 // Whether fetching the recent visits for the URL succeeded.

99 bool succeeded_;	103 bool succeeded_;

100 // The awaited data that's shown to private_data_ for it to copy and	104 // The awaited data that's shown to private_data_ for it to copy and

101 // store.	105 // store.

102 VisitVector recent_visits_;	106 history::VisitVector recent_visits_;

103	107

104 DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);	108 DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);

105 };	109 };

106	110

107 UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(	111 UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(

108 URLIndexPrivateData* private_data,	112 URLIndexPrivateData* private_data,

109 URLID url_id)	113 history::URLID url_id)

110 : private_data_(private_data),	114 : private_data_(private_data), url_id_(url_id), succeeded_(false) {

111 url_id_(url_id),

112 succeeded_(false) {

113 }	115 }

114	116

115 bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(	117 bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(

116 HistoryBackend* backend,	118 history::HistoryBackend* backend,

117 HistoryDatabase* db) {	119 history::HistoryDatabase* db) {

118 // Make sure the private data is going to get as many recent visits as	120 // Make sure the private data is going to get as many recent visits as

119 // ScoredHistoryMatch::GetFrequency() hopes to use.	121 // ScoredHistoryMatch::GetFrequency() hopes to use.

120 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);	122 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);

121 succeeded_ = db->GetMostRecentVisitsForURL(url_id_,	123 succeeded_ = db->GetMostRecentVisitsForURL(url_id_,

122 kMaxVisitsToStoreInCache,	124 kMaxVisitsToStoreInCache,

123 &recent_visits_);	125 &recent_visits_);

124 if (!succeeded_)	126 if (!succeeded_)

125 recent_visits_.clear();	127 recent_visits_.clear();

126 return true; // Always claim to be done; do not retry failures.	128 return true; // Always claim to be done; do not retry failures.

127 }	129 }

(...skipping 15 matching lines...) Expand all Loading...
143 pre_filter_item_count_(0),	145 pre_filter_item_count_(0),

144 post_filter_item_count_(0),	146 post_filter_item_count_(0),

145 post_scoring_item_count_(0) {	147 post_scoring_item_count_(0) {

146 }	148 }

147	149

148 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

149 base::string16 search_string,	151 base::string16 search_string,

150 size_t cursor_position,	152 size_t cursor_position,

151 size_t max_matches,	153 size_t max_matches,

152 const std::string& languages,	154 const std::string& languages,

153 const history::ScoredHistoryMatch::Builder& builder) {	155 const ScoredHistoryMatch::Builder& builder) {

154 // If cursor position is set and useful (not at either end of the	156 // If cursor position is set and useful (not at either end of the

155 // string), allow the search string to be broken at cursor position.	157 // string), allow the search string to be broken at cursor position.

156 // We do this by pretending there's a space where the cursor is.	158 // We do this by pretending there's a space where the cursor is.

157 if ((cursor_position != base::string16::npos) &&	159 if ((cursor_position != base::string16::npos) &&

158 (cursor_position < search_string.length()) &&	160 (cursor_position < search_string.length()) &&

159 (cursor_position > 0)) {	161 (cursor_position > 0)) {

160 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	162 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

161 }	163 }

162 pre_filter_item_count_ = 0;	164 pre_filter_item_count_ = 0;

163 post_filter_item_count_ = 0;	165 post_filter_item_count_ = 0;

164 post_scoring_item_count_ = 0;	166 post_scoring_item_count_ = 0;

165 // The search string we receive may contain escaped characters. For reducing	167 // The search string we receive may contain escaped characters. For reducing

166 // the index we need individual, lower-cased words, ignoring escapings. For	168 // the index we need individual, lower-cased words, ignoring escapings. For

167 // the final filtering we need whitespace separated substrings possibly	169 // the final filtering we need whitespace separated substrings possibly

168 // containing escaped characters.	170 // containing escaped characters.

169 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	171 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

170 base::string16 lower_unescaped_string =	172 base::string16 lower_unescaped_string =

171 net::UnescapeURLComponent(lower_raw_string,	173 net::UnescapeURLComponent(lower_raw_string,

172 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);	174 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

173 // Extract individual 'words' (as opposed to 'terms'; see below) from the	175 // Extract individual 'words' (as opposed to 'terms'; see below) from the

174 // search string. When the user types "colspec=ID%20Mstone Release" we get	176 // search string. When the user types "colspec=ID%20Mstone Release" we get

175 // four 'words': "colspec", "id", "mstone" and "release".	177 // four 'words': "colspec", "id", "mstone" and "release".

176 String16Vector lower_words(	178 String16Vector lower_words(

177 history::String16VectorFromString16(lower_unescaped_string, false, NULL));	179 String16VectorFromString16(lower_unescaped_string, false, NULL));

178 ScoredHistoryMatches scored_items;	180 ScoredHistoryMatches scored_items;

179	181

180 // Do nothing if we have indexed no words (probably because we've not been	182 // Do nothing if we have indexed no words (probably because we've not been

181 // initialized yet) or the search string has no words.	183 // initialized yet) or the search string has no words.

182 if (word_list_.empty() \|\| lower_words.empty()) {	184 if (word_list_.empty() \|\| lower_words.empty()) {

183 search_term_cache_.clear(); // Invalidate the term cache.	185 search_term_cache_.clear(); // Invalidate the term cache.

184 return scored_items;	186 return scored_items;

185 }	187 }

186	188

187 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	189 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222 // substring match, inserting those which pass in order by score. Note that	224 // substring match, inserting those which pass in order by score. Note that

223 // in this step we are using the raw search string complete with escaped	225 // in this step we are using the raw search string complete with escaped

224 // URL elements. When the user has specifically typed something akin to	226 // URL elements. When the user has specifically typed something akin to

225 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that	227 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

226 // specific substring appears in the URL or page title.	228 // specific substring appears in the URL or page title.

227	229

228 // We call these 'terms' (as opposed to 'words'; see above) as in this case	230 // We call these 'terms' (as opposed to 'words'; see above) as in this case

229 // we only want to break up the search string on 'true' whitespace rather than	231 // we only want to break up the search string on 'true' whitespace rather than

230 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we	232 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

231 // get two 'terms': "colspec=id%20mstone" and "release".	233 // get two 'terms': "colspec=id%20mstone" and "release".

232 history::String16Vector lower_raw_terms;	234 String16Vector lower_raw_terms;

233 if (Tokenize(lower_raw_string, base::kWhitespaceUTF16,	235 if (Tokenize(lower_raw_string, base::kWhitespaceUTF16,

234 &lower_raw_terms) == 0) {	236 &lower_raw_terms) == 0) {

235 // Don't score matches when there are no terms to score against. (It's	237 // Don't score matches when there are no terms to score against. (It's

236 // possible that the word break iterater that extracts words to search	238 // possible that the word break iterater that extracts words to search

237 // for in the database allows some whitespace "words" whereas Tokenize	239 // for in the database allows some whitespace "words" whereas Tokenize

238 // excludes a long list of whitespace.) One could write a scoring	240 // excludes a long list of whitespace.) One could write a scoring

239 // function that gives a reasonable order to matches when there	241 // function that gives a reasonable order to matches when there

240 // are no terms (i.e., all the words are some form of whitespace),	242 // are no terms (i.e., all the words are some form of whitespace),

241 // but this is such a rare edge case that it's not worth the time.	243 // but this is such a rare edge case that it's not worth the time.

242 return scored_items;	244 return scored_items;

(...skipping 29 matching lines...) Expand all Loading...
272 else	274 else

273 ++cache_iter;	275 ++cache_iter;

274 }	276 }

275 }	277 }

276	278

277 return scored_items;	279 return scored_items;

278 }	280 }

279	281

280 bool URLIndexPrivateData::UpdateURL(	282 bool URLIndexPrivateData::UpdateURL(

281 HistoryService* history_service,	283 HistoryService* history_service,

282 const URLRow& row,	284 const history::URLRow& row,

283 const std::string& languages,	285 const std::string& languages,

284 const std::set<std::string>& scheme_whitelist,	286 const std::set<std::string>& scheme_whitelist,

285 base::CancelableTaskTracker* tracker) {	287 base::CancelableTaskTracker* tracker) {

286 // The row may or may not already be in our index. If it is not already	288 // The row may or may not already be in our index. If it is not already

287 // indexed and it qualifies then it gets indexed. If it is already	289 // indexed and it qualifies then it gets indexed. If it is already

288 // indexed and still qualifies then it gets updated, otherwise it	290 // indexed and still qualifies then it gets updated, otherwise it

289 // is deleted from the index.	291 // is deleted from the index.

290 bool row_was_updated = false;	292 bool row_was_updated = false;

291 URLID row_id = row.id();	293 history::URLID row_id = row.id();

292 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);	294 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

293 if (row_pos == history_info_map_.end()) {	295 if (row_pos == history_info_map_.end()) {

294 // This new row should be indexed if it qualifies.	296 // This new row should be indexed if it qualifies.

295 URLRow new_row(row);	297 history::URLRow new_row(row);

296 new_row.set_id(row_id);	298 new_row.set_id(row_id);

297 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&	299 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

298 IndexRow(NULL,	300 IndexRow(NULL,

299 history_service,	301 history_service,

300 new_row,	302 new_row,

301 languages,	303 languages,

302 scheme_whitelist,	304 scheme_whitelist,

303 tracker);	305 tracker);

304 } else if (RowQualifiesAsSignificant(row, base::Time())) {	306 } else if (RowQualifiesAsSignificant(row, base::Time())) {

305 // This indexed row still qualifies and will be re-indexed.	307 // This indexed row still qualifies and will be re-indexed.

306 // The url won't have changed but the title, visit count, etc.	308 // The url won't have changed but the title, visit count, etc.

307 // might have changed.	309 // might have changed.

308 URLRow& row_to_update = row_pos->second.url_row;	310 history::URLRow& row_to_update = row_pos->second.url_row;

309 bool title_updated = row_to_update.title() != row.title();	311 bool title_updated = row_to_update.title() != row.title();

310 if (row_to_update.visit_count() != row.visit_count() \|\|	312 if (row_to_update.visit_count() != row.visit_count() \|\|

311 row_to_update.typed_count() != row.typed_count() \|\|	313 row_to_update.typed_count() != row.typed_count() \|\|

312 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {	314 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

313 row_to_update.set_visit_count(row.visit_count());	315 row_to_update.set_visit_count(row.visit_count());

314 row_to_update.set_typed_count(row.typed_count());	316 row_to_update.set_typed_count(row.typed_count());

315 row_to_update.set_last_visit(row.last_visit());	317 row_to_update.set_last_visit(row.last_visit());

316 // If something appears to have changed, update the recent visits	318 // If something appears to have changed, update the recent visits

317 // information.	319 // information.

318 ScheduleUpdateRecentVisits(history_service, row_id, tracker);	320 ScheduleUpdateRecentVisits(history_service, row_id, tracker);

(...skipping 15 matching lines...) Expand all Loading...
334 // clearing all words associated with this row.	336 // clearing all words associated with this row.

335 RemoveRowFromIndex(row);	337 RemoveRowFromIndex(row);

336 row_was_updated = true;	338 row_was_updated = true;

337 }	339 }

338 if (row_was_updated)	340 if (row_was_updated)

339 search_term_cache_.clear(); // This invalidates the cache.	341 search_term_cache_.clear(); // This invalidates the cache.

340 return row_was_updated;	342 return row_was_updated;

341 }	343 }

342	344

343 void URLIndexPrivateData::UpdateRecentVisits(	345 void URLIndexPrivateData::UpdateRecentVisits(

344 URLID url_id,	346 history::URLID url_id,

345 const VisitVector& recent_visits) {	347 const history::VisitVector& recent_visits) {

346 HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);	348 HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);

347 if (row_pos != history_info_map_.end()) {	349 if (row_pos != history_info_map_.end()) {

348 VisitInfoVector* visits = &row_pos->second.visits;	350 VisitInfoVector* visits = &row_pos->second.visits;

349 visits->clear();	351 visits->clear();

350 const size_t size =	352 const size_t size =

351 std::min(recent_visits.size(), kMaxVisitsToStoreInCache);	353 std::min(recent_visits.size(), kMaxVisitsToStoreInCache);

352 visits->reserve(size);	354 visits->reserve(size);

353 for (size_t i = 0; i < size; i++) {	355 for (size_t i = 0; i < size; i++) {

354 // Copy from the VisitVector the only fields visits needs.	356 // Copy from the history::VisitVector the only fields visits needs.

355 visits->push_back(std::make_pair(recent_visits[i].visit_time,	357 visits->push_back(std::make_pair(recent_visits[i].visit_time,

356 recent_visits[i].transition));	358 recent_visits[i].transition));

357 }	359 }

358 }	360 }

359 // Else: Oddly, the URL doesn't seem to exist in the private index.	361 // Else: Oddly, the URL doesn't seem to exist in the private index.

360 // Ignore this update. This can happen if, for instance, the user	362 // Ignore this update. This can happen if, for instance, the user

361 // removes the URL from URLIndexPrivateData before the historyDB call	363 // removes the URL from URLIndexPrivateData before the historyDB call

362 // returns.	364 // returns.

363 }	365 }

364	366

365 void URLIndexPrivateData::ScheduleUpdateRecentVisits(	367 void URLIndexPrivateData::ScheduleUpdateRecentVisits(

366 HistoryService* history_service,	368 HistoryService* history_service,

367 URLID url_id,	369 history::URLID url_id,

368 base::CancelableTaskTracker* tracker) {	370 base::CancelableTaskTracker* tracker) {

369 history_service->ScheduleDBTask(	371 history_service->ScheduleDBTask(

370 scoped_ptr<history::HistoryDBTask>(	372 scoped_ptr<history::HistoryDBTask>(

371 new UpdateRecentVisitsFromHistoryDBTask(this, url_id)), tracker);	373 new UpdateRecentVisitsFromHistoryDBTask(this, url_id)), tracker);

372 }	374 }

373	375

374 // Helper functor for DeleteURL.	376 // Helper functor for DeleteURL.

375 class HistoryInfoMapItemHasURL {	377 class HistoryInfoMapItemHasURL {

376 public:	378 public:

377 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}	379 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
430 restored_data->word_map_.size());	432 restored_data->word_map_.size());

431 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	433 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

432 restored_data->char_word_map_.size());	434 restored_data->char_word_map_.size());

433 if (restored_data->Empty())	435 if (restored_data->Empty())

434 return NULL; // 'No data' is the same as a failed reload.	436 return NULL; // 'No data' is the same as a failed reload.

435 return restored_data;	437 return restored_data;

436 }	438 }

437	439

438 // static	440 // static

439 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(	441 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

440 HistoryDatabase* history_db,	442 history::HistoryDatabase* history_db,

441 const std::string& languages,	443 const std::string& languages,

442 const std::set<std::string>& scheme_whitelist) {	444 const std::set<std::string>& scheme_whitelist) {

443 if (!history_db)	445 if (!history_db)

444 return NULL;	446 return NULL;

445	447

446 base::TimeTicks beginning_time = base::TimeTicks::Now();	448 base::TimeTicks beginning_time = base::TimeTicks::Now();

447	449

448 scoped_refptr<URLIndexPrivateData>	450 scoped_refptr<URLIndexPrivateData>

449 rebuilt_data(new URLIndexPrivateData);	451 rebuilt_data(new URLIndexPrivateData);

450 URLDatabase::URLEnumerator history_enum;	452 history::URLDatabase::URLEnumerator history_enum;

451 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))	453 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

452 return NULL;	454 return NULL;

453 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();	455 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();

454 for (URLRow row; history_enum.GetNextURL(&row); ) {	456 for (history::URLRow row; history_enum.GetNextURL(&row);) {

455 rebuilt_data->IndexRow(	457 rebuilt_data->IndexRow(

456 history_db, NULL, row, languages, scheme_whitelist, NULL);	458 history_db, NULL, row, languages, scheme_whitelist, NULL);

457 }	459 }

458	460

459 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",	461 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

460 base::TimeTicks::Now() - beginning_time);	462 base::TimeTicks::Now() - beginning_time);

461 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	463 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

462 rebuilt_data->history_id_word_map_.size());	464 rebuilt_data->history_id_word_map_.size());

463 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	465 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

464 rebuilt_data->word_map_.size());	466 rebuilt_data->word_map_.size());

(...skipping 213 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
678 // Subsequent character results get intersected in.	680 // Subsequent character results get intersected in.

679 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(	681 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

680 word_id_set, char_word_id_set);	682 word_id_set, char_word_id_set);

681 word_id_set.swap(new_word_id_set);	683 word_id_set.swap(new_word_id_set);

682 }	684 }

683 }	685 }

684 return word_id_set;	686 return word_id_set;

685 }	687 }

686	688

687 bool URLIndexPrivateData::IndexRow(	689 bool URLIndexPrivateData::IndexRow(

688 HistoryDatabase* history_db,	690 history::HistoryDatabase* history_db,

689 HistoryService* history_service,	691 HistoryService* history_service,

690 const URLRow& row,	692 const history::URLRow& row,

691 const std::string& languages,	693 const std::string& languages,

692 const std::set<std::string>& scheme_whitelist,	694 const std::set<std::string>& scheme_whitelist,

693 base::CancelableTaskTracker* tracker) {	695 base::CancelableTaskTracker* tracker) {

694 const GURL& gurl(row.url());	696 const GURL& gurl(row.url());

695	697

696 // Index only URLs with a whitelisted scheme.	698 // Index only URLs with a whitelisted scheme.

697 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))	699 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

698 return false;	700 return false;

699	701

700 URLID row_id = row.id();	702 history::URLID row_id = row.id();

701 // Strip out username and password before saving and indexing.	703 // Strip out username and password before saving and indexing.

702 base::string16 url(net::FormatUrl(gurl, languages,	704 base::string16 url(net::FormatUrl(gurl, languages,

703 net::kFormatUrlOmitUsernamePassword,	705 net::kFormatUrlOmitUsernamePassword,

704 net::UnescapeRule::NONE,	706 net::UnescapeRule::NONE,

705 NULL, NULL, NULL));	707 NULL, NULL, NULL));

706	708

707 HistoryID history_id = static_cast<HistoryID>(row_id);	709 HistoryID history_id = static_cast<HistoryID>(row_id);

708 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());	710 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

709	711

710 // Add the row for quick lookup in the history info store.	712 // Add the row for quick lookup in the history info store.

711 URLRow new_row(GURL(url), row_id);	713 history::URLRow new_row(GURL(url), row_id);

712 new_row.set_visit_count(row.visit_count());	714 new_row.set_visit_count(row.visit_count());

713 new_row.set_typed_count(row.typed_count());	715 new_row.set_typed_count(row.typed_count());

714 new_row.set_last_visit(row.last_visit());	716 new_row.set_last_visit(row.last_visit());

715 new_row.set_title(row.title());	717 new_row.set_title(row.title());

716 history_info_map_[history_id].url_row = new_row;	718 history_info_map_[history_id].url_row = new_row;

717	719

718 // Index the words contained in the URL and title of the row.	720 // Index the words contained in the URL and title of the row.

719 RowWordStarts word_starts;	721 RowWordStarts word_starts;

720 AddRowWordsToIndex(new_row, &word_starts, languages);	722 AddRowWordsToIndex(new_row, &word_starts, languages);

721 word_starts_map_[history_id] = word_starts;	723 word_starts_map_[history_id] = word_starts;

722	724

723 // Update the recent visits information or schedule the update	725 // Update the recent visits information or schedule the update

724 // as appropriate.	726 // as appropriate.

725 if (history_db) {	727 if (history_db) {

726 // We'd like to check that we're on the history DB thread.	728 // We'd like to check that we're on the history DB thread.

727 // However, unittest code actually calls this on the UI thread.	729 // However, unittest code actually calls this on the UI thread.

728 // So we don't do any thread checks.	730 // So we don't do any thread checks.

729 VisitVector recent_visits;	731 history::VisitVector recent_visits;

730 // Make sure the private data is going to get as many recent visits as	732 // Make sure the private data is going to get as many recent visits as

731 // ScoredHistoryMatch::GetFrequency() hopes to use.	733 // ScoredHistoryMatch::GetFrequency() hopes to use.

732 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);	734 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);

733 if (history_db->GetMostRecentVisitsForURL(row_id,	735 if (history_db->GetMostRecentVisitsForURL(row_id,

734 kMaxVisitsToStoreInCache,	736 kMaxVisitsToStoreInCache,

735 &recent_visits))	737 &recent_visits))

736 UpdateRecentVisits(row_id, recent_visits);	738 UpdateRecentVisits(row_id, recent_visits);

737 } else {	739 } else {

738 DCHECK(tracker);	740 DCHECK(tracker);

739 DCHECK(history_service);	741 DCHECK(history_service);

740 ScheduleUpdateRecentVisits(history_service, row_id, tracker);	742 ScheduleUpdateRecentVisits(history_service, row_id, tracker);

741 }	743 }

742	744

743 return true;	745 return true;

744 }	746 }

745	747

746 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,	748 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,

747 RowWordStarts* word_starts,	749 RowWordStarts* word_starts,

748 const std::string& languages) {	750 const std::string& languages) {

749 HistoryID history_id = static_cast<HistoryID>(row.id());	751 HistoryID history_id = static_cast<HistoryID>(row.id());

750 // Split URL into individual, unique words then add in the title words.	752 // Split URL into individual, unique words then add in the title words.

751 const GURL& gurl(row.url());	753 const GURL& gurl(row.url());

752 const base::string16& url =	754 const base::string16& url =

753 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL);	755 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL);

754 String16Set url_words = String16SetFromString16(url,	756 String16Set url_words = String16SetFromString16(url,

755 word_starts ? &word_starts->url_word_starts_ : NULL);	757 word_starts ? &word_starts->url_word_starts_ : NULL);

756 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());	758 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
825 if (iter != history_id_word_map_.end()) {	827 if (iter != history_id_word_map_.end()) {

826 WordIDSet& word_id_set(iter->second);	828 WordIDSet& word_id_set(iter->second);

827 word_id_set.insert(word_id);	829 word_id_set.insert(word_id);

828 } else {	830 } else {

829 WordIDSet word_id_set;	831 WordIDSet word_id_set;

830 word_id_set.insert(word_id);	832 word_id_set.insert(word_id);

831 history_id_word_map_[history_id] = word_id_set;	833 history_id_word_map_[history_id] = word_id_set;

832 }	834 }

833 }	835 }

834	836

835 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {	837 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {

836 RemoveRowWordsFromIndex(row);	838 RemoveRowWordsFromIndex(row);

837 HistoryID history_id = static_cast<HistoryID>(row.id());	839 HistoryID history_id = static_cast<HistoryID>(row.id());

838 history_info_map_.erase(history_id);	840 history_info_map_.erase(history_id);

839 word_starts_map_.erase(history_id);	841 word_starts_map_.erase(history_id);

840 }	842 }

841	843

842 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {	844 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {

843 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	845 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

844 // this row.	846 // this row.

845 HistoryID history_id = static_cast<HistoryID>(row.id());	847 HistoryID history_id = static_cast<HistoryID>(row.id());

846 WordIDSet word_id_set = history_id_word_map_[history_id];	848 WordIDSet word_id_set = history_id_word_map_[history_id];

847 history_id_word_map_.erase(history_id);	849 history_id_word_map_.erase(history_id);

848	850

849 // Reconcile any changes to word usage.	851 // Reconcile any changes to word usage.

850 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	852 for (WordIDSet::iterator word_id_iter = word_id_set.begin();

851 word_id_iter != word_id_set.end(); ++word_id_iter) {	853 word_id_iter != word_id_set.end(); ++word_id_iter) {

852 WordID word_id = *word_id_iter;	854 WordID word_id = *word_id_iter;

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
979 void URLIndexPrivateData::SaveHistoryInfoMap(	981 void URLIndexPrivateData::SaveHistoryInfoMap(

980 InMemoryURLIndexCacheItem* cache) const {	982 InMemoryURLIndexCacheItem* cache) const {

981 if (history_info_map_.empty())	983 if (history_info_map_.empty())

982 return;	984 return;

983 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	985 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

984 map_item->set_item_count(history_info_map_.size());	986 map_item->set_item_count(history_info_map_.size());

985 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	987 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

986 iter != history_info_map_.end(); ++iter) {	988 iter != history_info_map_.end(); ++iter) {

987 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	989 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

988 map_entry->set_history_id(iter->first);	990 map_entry->set_history_id(iter->first);

989 const URLRow& url_row(iter->second.url_row);	991 const history::URLRow& url_row(iter->second.url_row);

990 // Note: We only save information that contributes to the index so there	992 // Note: We only save information that contributes to the index so there

991 // is no need to save search_term_cache_ (not persistent).	993 // is no need to save search_term_cache_ (not persistent).

992 map_entry->set_visit_count(url_row.visit_count());	994 map_entry->set_visit_count(url_row.visit_count());

993 map_entry->set_typed_count(url_row.typed_count());	995 map_entry->set_typed_count(url_row.typed_count());

994 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	996 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

995 map_entry->set_url(url_row.url().spec());	997 map_entry->set_url(url_row.url().spec());

996 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));	998 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));

997 const VisitInfoVector& visits(iter->second.visits);	999 const VisitInfoVector& visits(iter->second.visits);

998 for (VisitInfoVector::const_iterator visit_iter = visits.begin();	1000 for (VisitInfoVector::const_iterator visit_iter = visits.begin();

999 visit_iter != visits.end(); ++visit_iter) {	1001 visit_iter != visits.end(); ++visit_iter) {

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1160 uint32 expected_item_count = list_item.item_count();	1162 uint32 expected_item_count = list_item.item_count();

1161 uint32 actual_item_count = list_item.history_info_map_entry_size();	1163 uint32 actual_item_count = list_item.history_info_map_entry_size();

1162 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1164 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1163 return false;	1165 return false;

1164 const RepeatedPtrField<HistoryInfoMapEntry>&	1166 const RepeatedPtrField<HistoryInfoMapEntry>&

1165 entries(list_item.history_info_map_entry());	1167 entries(list_item.history_info_map_entry());

1166 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =	1168 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =

1167 entries.begin(); iter != entries.end(); ++iter) {	1169 entries.begin(); iter != entries.end(); ++iter) {

1168 HistoryID history_id = iter->history_id();	1170 HistoryID history_id = iter->history_id();

1169 GURL url(iter->url());	1171 GURL url(iter->url());

1170 URLRow url_row(url, history_id);	1172 history::URLRow url_row(url, history_id);

1171 url_row.set_visit_count(iter->visit_count());	1173 url_row.set_visit_count(iter->visit_count());

1172 url_row.set_typed_count(iter->typed_count());	1174 url_row.set_typed_count(iter->typed_count());

1173 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1175 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

1174 if (iter->has_title()) {	1176 if (iter->has_title()) {

1175 base::string16 title(base::UTF8ToUTF16(iter->title()));	1177 base::string16 title(base::UTF8ToUTF16(iter->title()));

1176 url_row.set_title(title);	1178 url_row.set_title(title);

1177 }	1179 }

1178 history_info_map_[history_id].url_row = url_row;	1180 history_info_map_[history_id].url_row = url_row;

1179	1181

1180 // Restore visits list.	1182 // Restore visits list.

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1219 jiter != title_starts.end(); ++jiter)	1221 jiter != title_starts.end(); ++jiter)

1220 word_starts.title_word_starts_.push_back(*jiter);	1222 word_starts.title_word_starts_.push_back(*jiter);

1221 word_starts_map_[history_id] = word_starts;	1223 word_starts_map_[history_id] = word_starts;

1222 }	1224 }

1223 } else {	1225 } else {

1224 // Since the cache did not contain any word starts we must rebuild then from	1226 // Since the cache did not contain any word starts we must rebuild then from

1225 // the URL and page titles.	1227 // the URL and page titles.

1226 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1228 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

1227 iter != history_info_map_.end(); ++iter) {	1229 iter != history_info_map_.end(); ++iter) {

1228 RowWordStarts word_starts;	1230 RowWordStarts word_starts;

1229 const URLRow& row(iter->second.url_row);	1231 const history::URLRow& row(iter->second.url_row);

1230 const base::string16& url =	1232 const base::string16& url =

1231 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL);	1233 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL);

1232 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1234 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1233 const base::string16& title =	1235 const base::string16& title =

1234 bookmarks::CleanUpTitleForMatching(row.title());	1236 bookmarks::CleanUpTitleForMatching(row.title());

1235 String16VectorFromString16(title, false, &word_starts.title_word_starts_);	1237 String16VectorFromString16(title, false, &word_starts.title_word_starts_);

1236 word_starts_map_[iter->first] = word_starts;	1238 word_starts_map_[iter->first] = word_starts;

1237 }	1239 }

1238 }	1240 }

1239 return true;	1241 return true;

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1295 }	1297 }

1296 }	1298 }

1297	1299

1298 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}	1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

1299	1301

1300 void URLIndexPrivateData::AddHistoryMatch::operator()(	1302 void URLIndexPrivateData::AddHistoryMatch::operator()(

1301 const HistoryID history_id) {	1303 const HistoryID history_id) {

1302 HistoryInfoMap::const_iterator hist_pos =	1304 HistoryInfoMap::const_iterator hist_pos =

1303 private_data_.history_info_map_.find(history_id);	1305 private_data_.history_info_map_.find(history_id);

1304 if (hist_pos != private_data_.history_info_map_.end()) {	1306 if (hist_pos != private_data_.history_info_map_.end()) {

1305 const URLRow& hist_item = hist_pos->second.url_row;	1307 const history::URLRow& hist_item = hist_pos->second.url_row;

1306 const VisitInfoVector& visits = hist_pos->second.visits;	1308 const VisitInfoVector& visits = hist_pos->second.visits;

1307 WordStartsMap::const_iterator starts_pos =	1309 WordStartsMap::const_iterator starts_pos =

1308 private_data_.word_starts_map_.find(history_id);	1310 private_data_.word_starts_map_.find(history_id);

1309 DCHECK(starts_pos != private_data_.word_starts_map_.end());	1311 DCHECK(starts_pos != private_data_.word_starts_map_.end());

1310 ScoredHistoryMatch match = builder_.Build(	1312 ScoredHistoryMatch match = builder_.Build(

1311 hist_item, visits, languages_, lower_string_, lower_terms_,	1313 hist_item, visits, languages_, lower_string_, lower_terms_,

1312 lower_terms_to_word_starts_offsets_, starts_pos->second, now_);	1314 lower_terms_to_word_starts_offsets_, starts_pos->second, now_);

1313 if (match.raw_score > 0)	1315 if (match.raw_score > 0)

1314 scored_matches_.push_back(match);	1316 scored_matches_.push_back(match);

1315 }	1317 }

(...skipping 11 matching lines...) Expand all Loading...
1327	1329

1328 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(	1330 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

1329 const HistoryID h1,	1331 const HistoryID h1,

1330 const HistoryID h2) {	1332 const HistoryID h2) {

1331 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));	1333 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

1332 if (entry1 == history_info_map_.end())	1334 if (entry1 == history_info_map_.end())

1333 return false;	1335 return false;

1334 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));	1336 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

1335 if (entry2 == history_info_map_.end())	1337 if (entry2 == history_info_map_.end())

1336 return true;	1338 return true;

1337 const URLRow& r1(entry1->second.url_row);	1339 const history::URLRow& r1(entry1->second.url_row);

1338 const URLRow& r2(entry2->second.url_row);	1340 const history::URLRow& r2(entry2->second.url_row);

1339 // First cut: typed count, visit count, recency.	1341 // First cut: typed count, visit count, recency.

1340 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1342 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1341 // recently visited (within the last 12/24 hours) as highly important. Get	1343 // recently visited (within the last 12/24 hours) as highly important. Get

1342 // input from mpearson.	1344 // input from mpearson.

1343 if (r1.typed_count() != r2.typed_count())	1345 if (r1.typed_count() != r2.typed_count())

1344 return (r1.typed_count() > r2.typed_count());	1346 return (r1.typed_count() > r2.typed_count());

1345 if (r1.visit_count() != r2.visit_count())	1347 if (r1.visit_count() != r2.visit_count())

1346 return (r1.visit_count() > r2.visit_count());	1348 return (r1.visit_count() > r2.visit_count());

1347 return (r1.last_visit() > r2.last_visit());	1349 return (r1.last_visit() > r2.last_visit());

1348 }	1350 }

1349

1350 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/autocomplete/url_index_private_data.h ('k') | chrome/browser/history/history_service.h » ('j') | no next file with comments »