Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(954)

Side by Side Diff: chrome/browser/autocomplete/url_index_private_data.cc

Issue 959343004: Move InMemoryURLIndex outside of history namespace (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@in-memory-url-index
Patch Set: Rebase Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autocomplete/url_index_private_data.h" 5 #include "chrome/browser/autocomplete/url_index_private_data.h"
6 6
7 #include <functional> 7 #include <functional>
8 #include <iterator> 8 #include <iterator>
9 #include <limits> 9 #include <limits>
10 #include <numeric> 10 #include <numeric>
(...skipping 22 matching lines...) Expand all
33 #endif 33 #endif
34 34
35 using google::protobuf::RepeatedField; 35 using google::protobuf::RepeatedField;
36 using google::protobuf::RepeatedPtrField; 36 using google::protobuf::RepeatedPtrField;
37 using in_memory_url_index::InMemoryURLIndexCacheItem; 37 using in_memory_url_index::InMemoryURLIndexCacheItem;
38 38
39 namespace { 39 namespace {
40 static const size_t kMaxVisitsToStoreInCache = 10u; 40 static const size_t kMaxVisitsToStoreInCache = 10u;
41 } // anonymous namespace 41 } // anonymous namespace
42 42
43 namespace history { 43 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem
44 44 WordListItem;
45 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 45 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry
46 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 46 WordMapEntry;
47 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 47 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
48 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem; 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem
49 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry 49 CharWordMapItem;
50 CharWordMapEntry; 50 typedef in_memory_url_index::
51 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem 51 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry;
52 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
52 WordIDHistoryMapItem; 53 WordIDHistoryMapItem;
53 typedef imui:: 54 typedef in_memory_url_index::
54 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry 55 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
55 WordIDHistoryMapEntry; 56 WordIDHistoryMapEntry;
56 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; 57 typedef in_memory_url_index::InMemoryURLIndexCacheItem_HistoryInfoMapItem
57 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry 58 HistoryInfoMapItem;
58 HistoryInfoMapEntry; 59 typedef in_memory_url_index::
59 typedef imui:: 60 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
61 HistoryInfoMapEntry;
62 typedef in_memory_url_index::
60 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo 63 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo
61 HistoryInfoMapEntry_VisitInfo; 64 HistoryInfoMapEntry_VisitInfo;
62 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; 65 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem
63 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry 66 WordStartsMapItem;
64 WordStartsMapEntry; 67 typedef in_memory_url_index::
65 68 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
69 WordStartsMapEntry;
66 70
67 // Algorithm Functions --------------------------------------------------------- 71 // Algorithm Functions ---------------------------------------------------------
68 72
69 // Comparison function for sorting search terms by descending length. 73 // Comparison function for sorting search terms by descending length.
70 bool LengthGreater(const base::string16& string_a, 74 bool LengthGreater(const base::string16& string_a,
71 const base::string16& string_b) { 75 const base::string16& string_b) {
72 return string_a.length() > string_b.length(); 76 return string_a.length() > string_b.length();
73 } 77 }
74 78
75 79
76 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- 80 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------
77 81
78 // HistoryDBTask used to update the recent visit data for a particular 82 // HistoryDBTask used to update the recent visit data for a particular
79 // row from the history database. 83 // row from the history database.
80 class UpdateRecentVisitsFromHistoryDBTask : public HistoryDBTask { 84 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {
81 public: 85 public:
82 explicit UpdateRecentVisitsFromHistoryDBTask( 86 explicit UpdateRecentVisitsFromHistoryDBTask(
83 URLIndexPrivateData* private_data, 87 URLIndexPrivateData* private_data,
84 URLID url_id); 88 history::URLID url_id);
85 89
86 bool RunOnDBThread(HistoryBackend* backend, 90 bool RunOnDBThread(history::HistoryBackend* backend,
87 history::HistoryDatabase* db) override; 91 history::HistoryDatabase* db) override;
88 void DoneRunOnMainThread() override; 92 void DoneRunOnMainThread() override;
89 93
90 private: 94 private:
91 ~UpdateRecentVisitsFromHistoryDBTask() override; 95 ~UpdateRecentVisitsFromHistoryDBTask() override;
92 96
93 // The URLIndexPrivateData that gets updated after the historyDB 97 // The URLIndexPrivateData that gets updated after the historyDB
94 // task returns. 98 // task returns.
95 URLIndexPrivateData* private_data_; 99 URLIndexPrivateData* private_data_;
96 // The ID of the URL to get visits for and then update. 100 // The ID of the URL to get visits for and then update.
97 URLID url_id_; 101 history::URLID url_id_;
98 // Whether fetching the recent visits for the URL succeeded. 102 // Whether fetching the recent visits for the URL succeeded.
99 bool succeeded_; 103 bool succeeded_;
100 // The awaited data that's shown to private_data_ for it to copy and 104 // The awaited data that's shown to private_data_ for it to copy and
101 // store. 105 // store.
102 VisitVector recent_visits_; 106 history::VisitVector recent_visits_;
103 107
104 DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask); 108 DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);
105 }; 109 };
106 110
107 UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask( 111 UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(
108 URLIndexPrivateData* private_data, 112 URLIndexPrivateData* private_data,
109 URLID url_id) 113 history::URLID url_id)
110 : private_data_(private_data), 114 : private_data_(private_data), url_id_(url_id), succeeded_(false) {
111 url_id_(url_id),
112 succeeded_(false) {
113 } 115 }
114 116
115 bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread( 117 bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(
116 HistoryBackend* backend, 118 history::HistoryBackend* backend,
117 HistoryDatabase* db) { 119 history::HistoryDatabase* db) {
118 // Make sure the private data is going to get as many recent visits as 120 // Make sure the private data is going to get as many recent visits as
119 // ScoredHistoryMatch::GetFrequency() hopes to use. 121 // ScoredHistoryMatch::GetFrequency() hopes to use.
120 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore); 122 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
121 succeeded_ = db->GetMostRecentVisitsForURL(url_id_, 123 succeeded_ = db->GetMostRecentVisitsForURL(url_id_,
122 kMaxVisitsToStoreInCache, 124 kMaxVisitsToStoreInCache,
123 &recent_visits_); 125 &recent_visits_);
124 if (!succeeded_) 126 if (!succeeded_)
125 recent_visits_.clear(); 127 recent_visits_.clear();
126 return true; // Always claim to be done; do not retry failures. 128 return true; // Always claim to be done; do not retry failures.
127 } 129 }
(...skipping 15 matching lines...) Expand all
143 pre_filter_item_count_(0), 145 pre_filter_item_count_(0),
144 post_filter_item_count_(0), 146 post_filter_item_count_(0),
145 post_scoring_item_count_(0) { 147 post_scoring_item_count_(0) {
146 } 148 }
147 149
148 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( 150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
149 base::string16 search_string, 151 base::string16 search_string,
150 size_t cursor_position, 152 size_t cursor_position,
151 size_t max_matches, 153 size_t max_matches,
152 const std::string& languages, 154 const std::string& languages,
153 const history::ScoredHistoryMatch::Builder& builder) { 155 const ScoredHistoryMatch::Builder& builder) {
154 // If cursor position is set and useful (not at either end of the 156 // If cursor position is set and useful (not at either end of the
155 // string), allow the search string to be broken at cursor position. 157 // string), allow the search string to be broken at cursor position.
156 // We do this by pretending there's a space where the cursor is. 158 // We do this by pretending there's a space where the cursor is.
157 if ((cursor_position != base::string16::npos) && 159 if ((cursor_position != base::string16::npos) &&
158 (cursor_position < search_string.length()) && 160 (cursor_position < search_string.length()) &&
159 (cursor_position > 0)) { 161 (cursor_position > 0)) {
160 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); 162 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
161 } 163 }
162 pre_filter_item_count_ = 0; 164 pre_filter_item_count_ = 0;
163 post_filter_item_count_ = 0; 165 post_filter_item_count_ = 0;
164 post_scoring_item_count_ = 0; 166 post_scoring_item_count_ = 0;
165 // The search string we receive may contain escaped characters. For reducing 167 // The search string we receive may contain escaped characters. For reducing
166 // the index we need individual, lower-cased words, ignoring escapings. For 168 // the index we need individual, lower-cased words, ignoring escapings. For
167 // the final filtering we need whitespace separated substrings possibly 169 // the final filtering we need whitespace separated substrings possibly
168 // containing escaped characters. 170 // containing escaped characters.
169 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 171 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
170 base::string16 lower_unescaped_string = 172 base::string16 lower_unescaped_string =
171 net::UnescapeURLComponent(lower_raw_string, 173 net::UnescapeURLComponent(lower_raw_string,
172 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); 174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
173 // Extract individual 'words' (as opposed to 'terms'; see below) from the 175 // Extract individual 'words' (as opposed to 'terms'; see below) from the
174 // search string. When the user types "colspec=ID%20Mstone Release" we get 176 // search string. When the user types "colspec=ID%20Mstone Release" we get
175 // four 'words': "colspec", "id", "mstone" and "release". 177 // four 'words': "colspec", "id", "mstone" and "release".
176 String16Vector lower_words( 178 String16Vector lower_words(
177 history::String16VectorFromString16(lower_unescaped_string, false, NULL)); 179 String16VectorFromString16(lower_unescaped_string, false, NULL));
178 ScoredHistoryMatches scored_items; 180 ScoredHistoryMatches scored_items;
179 181
180 // Do nothing if we have indexed no words (probably because we've not been 182 // Do nothing if we have indexed no words (probably because we've not been
181 // initialized yet) or the search string has no words. 183 // initialized yet) or the search string has no words.
182 if (word_list_.empty() || lower_words.empty()) { 184 if (word_list_.empty() || lower_words.empty()) {
183 search_term_cache_.clear(); // Invalidate the term cache. 185 search_term_cache_.clear(); // Invalidate the term cache.
184 return scored_items; 186 return scored_items;
185 } 187 }
186 188
187 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 189 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 // substring match, inserting those which pass in order by score. Note that 224 // substring match, inserting those which pass in order by score. Note that
223 // in this step we are using the raw search string complete with escaped 225 // in this step we are using the raw search string complete with escaped
224 // URL elements. When the user has specifically typed something akin to 226 // URL elements. When the user has specifically typed something akin to
225 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that 227 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
226 // specific substring appears in the URL or page title. 228 // specific substring appears in the URL or page title.
227 229
228 // We call these 'terms' (as opposed to 'words'; see above) as in this case 230 // We call these 'terms' (as opposed to 'words'; see above) as in this case
229 // we only want to break up the search string on 'true' whitespace rather than 231 // we only want to break up the search string on 'true' whitespace rather than
230 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we 232 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
231 // get two 'terms': "colspec=id%20mstone" and "release". 233 // get two 'terms': "colspec=id%20mstone" and "release".
232 history::String16Vector lower_raw_terms; 234 String16Vector lower_raw_terms;
233 if (Tokenize(lower_raw_string, base::kWhitespaceUTF16, 235 if (Tokenize(lower_raw_string, base::kWhitespaceUTF16,
234 &lower_raw_terms) == 0) { 236 &lower_raw_terms) == 0) {
235 // Don't score matches when there are no terms to score against. (It's 237 // Don't score matches when there are no terms to score against. (It's
236 // possible that the word break iterater that extracts words to search 238 // possible that the word break iterater that extracts words to search
237 // for in the database allows some whitespace "words" whereas Tokenize 239 // for in the database allows some whitespace "words" whereas Tokenize
238 // excludes a long list of whitespace.) One could write a scoring 240 // excludes a long list of whitespace.) One could write a scoring
239 // function that gives a reasonable order to matches when there 241 // function that gives a reasonable order to matches when there
240 // are no terms (i.e., all the words are some form of whitespace), 242 // are no terms (i.e., all the words are some form of whitespace),
241 // but this is such a rare edge case that it's not worth the time. 243 // but this is such a rare edge case that it's not worth the time.
242 return scored_items; 244 return scored_items;
(...skipping 29 matching lines...) Expand all
272 else 274 else
273 ++cache_iter; 275 ++cache_iter;
274 } 276 }
275 } 277 }
276 278
277 return scored_items; 279 return scored_items;
278 } 280 }
279 281
280 bool URLIndexPrivateData::UpdateURL( 282 bool URLIndexPrivateData::UpdateURL(
281 HistoryService* history_service, 283 HistoryService* history_service,
282 const URLRow& row, 284 const history::URLRow& row,
283 const std::string& languages, 285 const std::string& languages,
284 const std::set<std::string>& scheme_whitelist, 286 const std::set<std::string>& scheme_whitelist,
285 base::CancelableTaskTracker* tracker) { 287 base::CancelableTaskTracker* tracker) {
286 // The row may or may not already be in our index. If it is not already 288 // The row may or may not already be in our index. If it is not already
287 // indexed and it qualifies then it gets indexed. If it is already 289 // indexed and it qualifies then it gets indexed. If it is already
288 // indexed and still qualifies then it gets updated, otherwise it 290 // indexed and still qualifies then it gets updated, otherwise it
289 // is deleted from the index. 291 // is deleted from the index.
290 bool row_was_updated = false; 292 bool row_was_updated = false;
291 URLID row_id = row.id(); 293 history::URLID row_id = row.id();
292 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 294 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
293 if (row_pos == history_info_map_.end()) { 295 if (row_pos == history_info_map_.end()) {
294 // This new row should be indexed if it qualifies. 296 // This new row should be indexed if it qualifies.
295 URLRow new_row(row); 297 history::URLRow new_row(row);
296 new_row.set_id(row_id); 298 new_row.set_id(row_id);
297 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && 299 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
298 IndexRow(NULL, 300 IndexRow(NULL,
299 history_service, 301 history_service,
300 new_row, 302 new_row,
301 languages, 303 languages,
302 scheme_whitelist, 304 scheme_whitelist,
303 tracker); 305 tracker);
304 } else if (RowQualifiesAsSignificant(row, base::Time())) { 306 } else if (RowQualifiesAsSignificant(row, base::Time())) {
305 // This indexed row still qualifies and will be re-indexed. 307 // This indexed row still qualifies and will be re-indexed.
306 // The url won't have changed but the title, visit count, etc. 308 // The url won't have changed but the title, visit count, etc.
307 // might have changed. 309 // might have changed.
308 URLRow& row_to_update = row_pos->second.url_row; 310 history::URLRow& row_to_update = row_pos->second.url_row;
309 bool title_updated = row_to_update.title() != row.title(); 311 bool title_updated = row_to_update.title() != row.title();
310 if (row_to_update.visit_count() != row.visit_count() || 312 if (row_to_update.visit_count() != row.visit_count() ||
311 row_to_update.typed_count() != row.typed_count() || 313 row_to_update.typed_count() != row.typed_count() ||
312 row_to_update.last_visit() != row.last_visit() || title_updated) { 314 row_to_update.last_visit() != row.last_visit() || title_updated) {
313 row_to_update.set_visit_count(row.visit_count()); 315 row_to_update.set_visit_count(row.visit_count());
314 row_to_update.set_typed_count(row.typed_count()); 316 row_to_update.set_typed_count(row.typed_count());
315 row_to_update.set_last_visit(row.last_visit()); 317 row_to_update.set_last_visit(row.last_visit());
316 // If something appears to have changed, update the recent visits 318 // If something appears to have changed, update the recent visits
317 // information. 319 // information.
318 ScheduleUpdateRecentVisits(history_service, row_id, tracker); 320 ScheduleUpdateRecentVisits(history_service, row_id, tracker);
(...skipping 15 matching lines...) Expand all
334 // clearing all words associated with this row. 336 // clearing all words associated with this row.
335 RemoveRowFromIndex(row); 337 RemoveRowFromIndex(row);
336 row_was_updated = true; 338 row_was_updated = true;
337 } 339 }
338 if (row_was_updated) 340 if (row_was_updated)
339 search_term_cache_.clear(); // This invalidates the cache. 341 search_term_cache_.clear(); // This invalidates the cache.
340 return row_was_updated; 342 return row_was_updated;
341 } 343 }
342 344
343 void URLIndexPrivateData::UpdateRecentVisits( 345 void URLIndexPrivateData::UpdateRecentVisits(
344 URLID url_id, 346 history::URLID url_id,
345 const VisitVector& recent_visits) { 347 const history::VisitVector& recent_visits) {
346 HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id); 348 HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);
347 if (row_pos != history_info_map_.end()) { 349 if (row_pos != history_info_map_.end()) {
348 VisitInfoVector* visits = &row_pos->second.visits; 350 VisitInfoVector* visits = &row_pos->second.visits;
349 visits->clear(); 351 visits->clear();
350 const size_t size = 352 const size_t size =
351 std::min(recent_visits.size(), kMaxVisitsToStoreInCache); 353 std::min(recent_visits.size(), kMaxVisitsToStoreInCache);
352 visits->reserve(size); 354 visits->reserve(size);
353 for (size_t i = 0; i < size; i++) { 355 for (size_t i = 0; i < size; i++) {
354 // Copy from the VisitVector the only fields visits needs. 356 // Copy from the history::VisitVector the only fields visits needs.
355 visits->push_back(std::make_pair(recent_visits[i].visit_time, 357 visits->push_back(std::make_pair(recent_visits[i].visit_time,
356 recent_visits[i].transition)); 358 recent_visits[i].transition));
357 } 359 }
358 } 360 }
359 // Else: Oddly, the URL doesn't seem to exist in the private index. 361 // Else: Oddly, the URL doesn't seem to exist in the private index.
360 // Ignore this update. This can happen if, for instance, the user 362 // Ignore this update. This can happen if, for instance, the user
361 // removes the URL from URLIndexPrivateData before the historyDB call 363 // removes the URL from URLIndexPrivateData before the historyDB call
362 // returns. 364 // returns.
363 } 365 }
364 366
365 void URLIndexPrivateData::ScheduleUpdateRecentVisits( 367 void URLIndexPrivateData::ScheduleUpdateRecentVisits(
366 HistoryService* history_service, 368 HistoryService* history_service,
367 URLID url_id, 369 history::URLID url_id,
368 base::CancelableTaskTracker* tracker) { 370 base::CancelableTaskTracker* tracker) {
369 history_service->ScheduleDBTask( 371 history_service->ScheduleDBTask(
370 scoped_ptr<history::HistoryDBTask>( 372 scoped_ptr<history::HistoryDBTask>(
371 new UpdateRecentVisitsFromHistoryDBTask(this, url_id)), tracker); 373 new UpdateRecentVisitsFromHistoryDBTask(this, url_id)), tracker);
372 } 374 }
373 375
374 // Helper functor for DeleteURL. 376 // Helper functor for DeleteURL.
375 class HistoryInfoMapItemHasURL { 377 class HistoryInfoMapItemHasURL {
376 public: 378 public:
377 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {} 379 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
430 restored_data->word_map_.size()); 432 restored_data->word_map_.size());
431 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", 433 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
432 restored_data->char_word_map_.size()); 434 restored_data->char_word_map_.size());
433 if (restored_data->Empty()) 435 if (restored_data->Empty())
434 return NULL; // 'No data' is the same as a failed reload. 436 return NULL; // 'No data' is the same as a failed reload.
435 return restored_data; 437 return restored_data;
436 } 438 }
437 439
438 // static 440 // static
439 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( 441 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
440 HistoryDatabase* history_db, 442 history::HistoryDatabase* history_db,
441 const std::string& languages, 443 const std::string& languages,
442 const std::set<std::string>& scheme_whitelist) { 444 const std::set<std::string>& scheme_whitelist) {
443 if (!history_db) 445 if (!history_db)
444 return NULL; 446 return NULL;
445 447
446 base::TimeTicks beginning_time = base::TimeTicks::Now(); 448 base::TimeTicks beginning_time = base::TimeTicks::Now();
447 449
448 scoped_refptr<URLIndexPrivateData> 450 scoped_refptr<URLIndexPrivateData>
449 rebuilt_data(new URLIndexPrivateData); 451 rebuilt_data(new URLIndexPrivateData);
450 URLDatabase::URLEnumerator history_enum; 452 history::URLDatabase::URLEnumerator history_enum;
451 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 453 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
452 return NULL; 454 return NULL;
453 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now(); 455 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();
454 for (URLRow row; history_enum.GetNextURL(&row); ) { 456 for (history::URLRow row; history_enum.GetNextURL(&row);) {
455 rebuilt_data->IndexRow( 457 rebuilt_data->IndexRow(
456 history_db, NULL, row, languages, scheme_whitelist, NULL); 458 history_db, NULL, row, languages, scheme_whitelist, NULL);
457 } 459 }
458 460
459 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 461 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
460 base::TimeTicks::Now() - beginning_time); 462 base::TimeTicks::Now() - beginning_time);
461 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 463 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
462 rebuilt_data->history_id_word_map_.size()); 464 rebuilt_data->history_id_word_map_.size());
463 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", 465 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
464 rebuilt_data->word_map_.size()); 466 rebuilt_data->word_map_.size());
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 // Subsequent character results get intersected in. 680 // Subsequent character results get intersected in.
679 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>( 681 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
680 word_id_set, char_word_id_set); 682 word_id_set, char_word_id_set);
681 word_id_set.swap(new_word_id_set); 683 word_id_set.swap(new_word_id_set);
682 } 684 }
683 } 685 }
684 return word_id_set; 686 return word_id_set;
685 } 687 }
686 688
687 bool URLIndexPrivateData::IndexRow( 689 bool URLIndexPrivateData::IndexRow(
688 HistoryDatabase* history_db, 690 history::HistoryDatabase* history_db,
689 HistoryService* history_service, 691 HistoryService* history_service,
690 const URLRow& row, 692 const history::URLRow& row,
691 const std::string& languages, 693 const std::string& languages,
692 const std::set<std::string>& scheme_whitelist, 694 const std::set<std::string>& scheme_whitelist,
693 base::CancelableTaskTracker* tracker) { 695 base::CancelableTaskTracker* tracker) {
694 const GURL& gurl(row.url()); 696 const GURL& gurl(row.url());
695 697
696 // Index only URLs with a whitelisted scheme. 698 // Index only URLs with a whitelisted scheme.
697 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) 699 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
698 return false; 700 return false;
699 701
700 URLID row_id = row.id(); 702 history::URLID row_id = row.id();
701 // Strip out username and password before saving and indexing. 703 // Strip out username and password before saving and indexing.
702 base::string16 url(net::FormatUrl(gurl, languages, 704 base::string16 url(net::FormatUrl(gurl, languages,
703 net::kFormatUrlOmitUsernamePassword, 705 net::kFormatUrlOmitUsernamePassword,
704 net::UnescapeRule::NONE, 706 net::UnescapeRule::NONE,
705 NULL, NULL, NULL)); 707 NULL, NULL, NULL));
706 708
707 HistoryID history_id = static_cast<HistoryID>(row_id); 709 HistoryID history_id = static_cast<HistoryID>(row_id);
708 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 710 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
709 711
710 // Add the row for quick lookup in the history info store. 712 // Add the row for quick lookup in the history info store.
711 URLRow new_row(GURL(url), row_id); 713 history::URLRow new_row(GURL(url), row_id);
712 new_row.set_visit_count(row.visit_count()); 714 new_row.set_visit_count(row.visit_count());
713 new_row.set_typed_count(row.typed_count()); 715 new_row.set_typed_count(row.typed_count());
714 new_row.set_last_visit(row.last_visit()); 716 new_row.set_last_visit(row.last_visit());
715 new_row.set_title(row.title()); 717 new_row.set_title(row.title());
716 history_info_map_[history_id].url_row = new_row; 718 history_info_map_[history_id].url_row = new_row;
717 719
718 // Index the words contained in the URL and title of the row. 720 // Index the words contained in the URL and title of the row.
719 RowWordStarts word_starts; 721 RowWordStarts word_starts;
720 AddRowWordsToIndex(new_row, &word_starts, languages); 722 AddRowWordsToIndex(new_row, &word_starts, languages);
721 word_starts_map_[history_id] = word_starts; 723 word_starts_map_[history_id] = word_starts;
722 724
723 // Update the recent visits information or schedule the update 725 // Update the recent visits information or schedule the update
724 // as appropriate. 726 // as appropriate.
725 if (history_db) { 727 if (history_db) {
726 // We'd like to check that we're on the history DB thread. 728 // We'd like to check that we're on the history DB thread.
727 // However, unittest code actually calls this on the UI thread. 729 // However, unittest code actually calls this on the UI thread.
728 // So we don't do any thread checks. 730 // So we don't do any thread checks.
729 VisitVector recent_visits; 731 history::VisitVector recent_visits;
730 // Make sure the private data is going to get as many recent visits as 732 // Make sure the private data is going to get as many recent visits as
731 // ScoredHistoryMatch::GetFrequency() hopes to use. 733 // ScoredHistoryMatch::GetFrequency() hopes to use.
732 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore); 734 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
733 if (history_db->GetMostRecentVisitsForURL(row_id, 735 if (history_db->GetMostRecentVisitsForURL(row_id,
734 kMaxVisitsToStoreInCache, 736 kMaxVisitsToStoreInCache,
735 &recent_visits)) 737 &recent_visits))
736 UpdateRecentVisits(row_id, recent_visits); 738 UpdateRecentVisits(row_id, recent_visits);
737 } else { 739 } else {
738 DCHECK(tracker); 740 DCHECK(tracker);
739 DCHECK(history_service); 741 DCHECK(history_service);
740 ScheduleUpdateRecentVisits(history_service, row_id, tracker); 742 ScheduleUpdateRecentVisits(history_service, row_id, tracker);
741 } 743 }
742 744
743 return true; 745 return true;
744 } 746 }
745 747
746 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, 748 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,
747 RowWordStarts* word_starts, 749 RowWordStarts* word_starts,
748 const std::string& languages) { 750 const std::string& languages) {
749 HistoryID history_id = static_cast<HistoryID>(row.id()); 751 HistoryID history_id = static_cast<HistoryID>(row.id());
750 // Split URL into individual, unique words then add in the title words. 752 // Split URL into individual, unique words then add in the title words.
751 const GURL& gurl(row.url()); 753 const GURL& gurl(row.url());
752 const base::string16& url = 754 const base::string16& url =
753 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL); 755 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL);
754 String16Set url_words = String16SetFromString16(url, 756 String16Set url_words = String16SetFromString16(url,
755 word_starts ? &word_starts->url_word_starts_ : NULL); 757 word_starts ? &word_starts->url_word_starts_ : NULL);
756 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 758 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
825 if (iter != history_id_word_map_.end()) { 827 if (iter != history_id_word_map_.end()) {
826 WordIDSet& word_id_set(iter->second); 828 WordIDSet& word_id_set(iter->second);
827 word_id_set.insert(word_id); 829 word_id_set.insert(word_id);
828 } else { 830 } else {
829 WordIDSet word_id_set; 831 WordIDSet word_id_set;
830 word_id_set.insert(word_id); 832 word_id_set.insert(word_id);
831 history_id_word_map_[history_id] = word_id_set; 833 history_id_word_map_[history_id] = word_id_set;
832 } 834 }
833 } 835 }
834 836
835 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { 837 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
836 RemoveRowWordsFromIndex(row); 838 RemoveRowWordsFromIndex(row);
837 HistoryID history_id = static_cast<HistoryID>(row.id()); 839 HistoryID history_id = static_cast<HistoryID>(row.id());
838 history_info_map_.erase(history_id); 840 history_info_map_.erase(history_id);
839 word_starts_map_.erase(history_id); 841 word_starts_map_.erase(history_id);
840 } 842 }
841 843
842 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { 844 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
843 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 845 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
844 // this row. 846 // this row.
845 HistoryID history_id = static_cast<HistoryID>(row.id()); 847 HistoryID history_id = static_cast<HistoryID>(row.id());
846 WordIDSet word_id_set = history_id_word_map_[history_id]; 848 WordIDSet word_id_set = history_id_word_map_[history_id];
847 history_id_word_map_.erase(history_id); 849 history_id_word_map_.erase(history_id);
848 850
849 // Reconcile any changes to word usage. 851 // Reconcile any changes to word usage.
850 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 852 for (WordIDSet::iterator word_id_iter = word_id_set.begin();
851 word_id_iter != word_id_set.end(); ++word_id_iter) { 853 word_id_iter != word_id_set.end(); ++word_id_iter) {
852 WordID word_id = *word_id_iter; 854 WordID word_id = *word_id_iter;
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
979 void URLIndexPrivateData::SaveHistoryInfoMap( 981 void URLIndexPrivateData::SaveHistoryInfoMap(
980 InMemoryURLIndexCacheItem* cache) const { 982 InMemoryURLIndexCacheItem* cache) const {
981 if (history_info_map_.empty()) 983 if (history_info_map_.empty())
982 return; 984 return;
983 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 985 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
984 map_item->set_item_count(history_info_map_.size()); 986 map_item->set_item_count(history_info_map_.size());
985 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 987 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
986 iter != history_info_map_.end(); ++iter) { 988 iter != history_info_map_.end(); ++iter) {
987 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 989 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
988 map_entry->set_history_id(iter->first); 990 map_entry->set_history_id(iter->first);
989 const URLRow& url_row(iter->second.url_row); 991 const history::URLRow& url_row(iter->second.url_row);
990 // Note: We only save information that contributes to the index so there 992 // Note: We only save information that contributes to the index so there
991 // is no need to save search_term_cache_ (not persistent). 993 // is no need to save search_term_cache_ (not persistent).
992 map_entry->set_visit_count(url_row.visit_count()); 994 map_entry->set_visit_count(url_row.visit_count());
993 map_entry->set_typed_count(url_row.typed_count()); 995 map_entry->set_typed_count(url_row.typed_count());
994 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 996 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
995 map_entry->set_url(url_row.url().spec()); 997 map_entry->set_url(url_row.url().spec());
996 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); 998 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
997 const VisitInfoVector& visits(iter->second.visits); 999 const VisitInfoVector& visits(iter->second.visits);
998 for (VisitInfoVector::const_iterator visit_iter = visits.begin(); 1000 for (VisitInfoVector::const_iterator visit_iter = visits.begin();
999 visit_iter != visits.end(); ++visit_iter) { 1001 visit_iter != visits.end(); ++visit_iter) {
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
1160 uint32 expected_item_count = list_item.item_count(); 1162 uint32 expected_item_count = list_item.item_count();
1161 uint32 actual_item_count = list_item.history_info_map_entry_size(); 1163 uint32 actual_item_count = list_item.history_info_map_entry_size();
1162 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1164 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1163 return false; 1165 return false;
1164 const RepeatedPtrField<HistoryInfoMapEntry>& 1166 const RepeatedPtrField<HistoryInfoMapEntry>&
1165 entries(list_item.history_info_map_entry()); 1167 entries(list_item.history_info_map_entry());
1166 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1168 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
1167 entries.begin(); iter != entries.end(); ++iter) { 1169 entries.begin(); iter != entries.end(); ++iter) {
1168 HistoryID history_id = iter->history_id(); 1170 HistoryID history_id = iter->history_id();
1169 GURL url(iter->url()); 1171 GURL url(iter->url());
1170 URLRow url_row(url, history_id); 1172 history::URLRow url_row(url, history_id);
1171 url_row.set_visit_count(iter->visit_count()); 1173 url_row.set_visit_count(iter->visit_count());
1172 url_row.set_typed_count(iter->typed_count()); 1174 url_row.set_typed_count(iter->typed_count());
1173 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1175 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1174 if (iter->has_title()) { 1176 if (iter->has_title()) {
1175 base::string16 title(base::UTF8ToUTF16(iter->title())); 1177 base::string16 title(base::UTF8ToUTF16(iter->title()));
1176 url_row.set_title(title); 1178 url_row.set_title(title);
1177 } 1179 }
1178 history_info_map_[history_id].url_row = url_row; 1180 history_info_map_[history_id].url_row = url_row;
1179 1181
1180 // Restore visits list. 1182 // Restore visits list.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1219 jiter != title_starts.end(); ++jiter) 1221 jiter != title_starts.end(); ++jiter)
1220 word_starts.title_word_starts_.push_back(*jiter); 1222 word_starts.title_word_starts_.push_back(*jiter);
1221 word_starts_map_[history_id] = word_starts; 1223 word_starts_map_[history_id] = word_starts;
1222 } 1224 }
1223 } else { 1225 } else {
1224 // Since the cache did not contain any word starts we must rebuild then from 1226 // Since the cache did not contain any word starts we must rebuild then from
1225 // the URL and page titles. 1227 // the URL and page titles.
1226 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1228 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1227 iter != history_info_map_.end(); ++iter) { 1229 iter != history_info_map_.end(); ++iter) {
1228 RowWordStarts word_starts; 1230 RowWordStarts word_starts;
1229 const URLRow& row(iter->second.url_row); 1231 const history::URLRow& row(iter->second.url_row);
1230 const base::string16& url = 1232 const base::string16& url =
1231 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL); 1233 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL);
1232 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1234 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1233 const base::string16& title = 1235 const base::string16& title =
1234 bookmarks::CleanUpTitleForMatching(row.title()); 1236 bookmarks::CleanUpTitleForMatching(row.title());
1235 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1237 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1236 word_starts_map_[iter->first] = word_starts; 1238 word_starts_map_[iter->first] = word_starts;
1237 } 1239 }
1238 } 1240 }
1239 return true; 1241 return true;
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
1295 } 1297 }
1296 } 1298 }
1297 1299
1298 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {} 1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
1299 1301
1300 void URLIndexPrivateData::AddHistoryMatch::operator()( 1302 void URLIndexPrivateData::AddHistoryMatch::operator()(
1301 const HistoryID history_id) { 1303 const HistoryID history_id) {
1302 HistoryInfoMap::const_iterator hist_pos = 1304 HistoryInfoMap::const_iterator hist_pos =
1303 private_data_.history_info_map_.find(history_id); 1305 private_data_.history_info_map_.find(history_id);
1304 if (hist_pos != private_data_.history_info_map_.end()) { 1306 if (hist_pos != private_data_.history_info_map_.end()) {
1305 const URLRow& hist_item = hist_pos->second.url_row; 1307 const history::URLRow& hist_item = hist_pos->second.url_row;
1306 const VisitInfoVector& visits = hist_pos->second.visits; 1308 const VisitInfoVector& visits = hist_pos->second.visits;
1307 WordStartsMap::const_iterator starts_pos = 1309 WordStartsMap::const_iterator starts_pos =
1308 private_data_.word_starts_map_.find(history_id); 1310 private_data_.word_starts_map_.find(history_id);
1309 DCHECK(starts_pos != private_data_.word_starts_map_.end()); 1311 DCHECK(starts_pos != private_data_.word_starts_map_.end());
1310 ScoredHistoryMatch match = builder_.Build( 1312 ScoredHistoryMatch match = builder_.Build(
1311 hist_item, visits, languages_, lower_string_, lower_terms_, 1313 hist_item, visits, languages_, lower_string_, lower_terms_,
1312 lower_terms_to_word_starts_offsets_, starts_pos->second, now_); 1314 lower_terms_to_word_starts_offsets_, starts_pos->second, now_);
1313 if (match.raw_score > 0) 1315 if (match.raw_score > 0)
1314 scored_matches_.push_back(match); 1316 scored_matches_.push_back(match);
1315 } 1317 }
(...skipping 11 matching lines...) Expand all
1327 1329
1328 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( 1330 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
1329 const HistoryID h1, 1331 const HistoryID h1,
1330 const HistoryID h2) { 1332 const HistoryID h2) {
1331 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1)); 1333 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
1332 if (entry1 == history_info_map_.end()) 1334 if (entry1 == history_info_map_.end())
1333 return false; 1335 return false;
1334 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2)); 1336 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
1335 if (entry2 == history_info_map_.end()) 1337 if (entry2 == history_info_map_.end())
1336 return true; 1338 return true;
1337 const URLRow& r1(entry1->second.url_row); 1339 const history::URLRow& r1(entry1->second.url_row);
1338 const URLRow& r2(entry2->second.url_row); 1340 const history::URLRow& r2(entry2->second.url_row);
1339 // First cut: typed count, visit count, recency. 1341 // First cut: typed count, visit count, recency.
1340 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1342 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1341 // recently visited (within the last 12/24 hours) as highly important. Get 1343 // recently visited (within the last 12/24 hours) as highly important. Get
1342 // input from mpearson. 1344 // input from mpearson.
1343 if (r1.typed_count() != r2.typed_count()) 1345 if (r1.typed_count() != r2.typed_count())
1344 return (r1.typed_count() > r2.typed_count()); 1346 return (r1.typed_count() > r2.typed_count());
1345 if (r1.visit_count() != r2.visit_count()) 1347 if (r1.visit_count() != r2.visit_count())
1346 return (r1.visit_count() > r2.visit_count()); 1348 return (r1.visit_count() > r2.visit_count());
1347 return (r1.last_visit() > r2.last_visit()); 1349 return (r1.last_visit() > r2.last_visit());
1348 } 1350 }
1349
1350 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/autocomplete/url_index_private_data.h ('k') | chrome/browser/history/history_service.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698