Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: chrome/browser/history/url_index_private_data.h

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Syncing with hopes of pleasing trybot update Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
7 #pragma once 7 #pragma once
8 8
9 #include "base/file_path.h" 9 #include "base/file_path.h"
10 #include "base/gtest_prod_util.h" 10 #include "base/gtest_prod_util.h"
11 #include "base/memory/ref_counted.h"
11 #include "chrome/browser/history/in_memory_url_index_types.h" 12 #include "chrome/browser/history/in_memory_url_index_types.h"
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" 13 #include "chrome/browser/history/in_memory_url_index_cache.pb.h"
14 #include "content/public/browser/notification_details.h"
13 15
14 class HistoryQuickProviderTest; 16 class HistoryQuickProviderTest;
15 17
16 namespace in_memory_url_index { 18 namespace in_memory_url_index {
17 class InMemoryURLIndexCacheItem; 19 class InMemoryURLIndexCacheItem;
18 } 20 }
19 21
20 namespace history { 22 namespace history {
21 23
22 namespace imui = in_memory_url_index; 24 namespace imui = in_memory_url_index;
23 25
24 class HistoryDatabase; 26 class HistoryDatabase;
27 class InMemoryURLIndex;
28 class RefCountedBool;
25 29
26 // Current version of the cache file. 30 // Current version of the cache file.
27 static const int kCurrentCacheFileVersion = 1; 31 static const int kCurrentCacheFileVersion = 1;
28 32
29 // A structure describing the InMemoryURLIndex's internal data and providing for 33 // A structure describing the InMemoryURLIndex's internal data and providing for
30 // restoring, rebuilding and updating that internal data. 34 // restoring, rebuilding and updating that internal data.
31 class URLIndexPrivateData { 35 class URLIndexPrivateData
36 : public base::RefCountedThreadSafe<URLIndexPrivateData> {
32 public: 37 public:
33 URLIndexPrivateData(); 38 URLIndexPrivateData();
39
40 private:
41 friend class base::RefCountedThreadSafe<URLIndexPrivateData>;
34 ~URLIndexPrivateData(); 42 ~URLIndexPrivateData();
35 43
36 private:
37 friend class AddHistoryMatch; 44 friend class AddHistoryMatch;
38 friend class ::HistoryQuickProviderTest; 45 friend class ::HistoryQuickProviderTest;
39 friend class InMemoryURLIndex; 46 friend class InMemoryURLIndex;
40 friend class InMemoryURLIndexTest; 47 friend class InMemoryURLIndexTest;
41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); 48 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
42 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); 49 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet);
43 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); 50 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
44 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); 51 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
45 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); 52 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
46 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); 53 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs);
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 // or page title. Scores are then calculated on no more than 129 // or page title. Scores are then calculated on no more than
123 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of 130 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of
124 // candidates may cause perceptible typing response delays in the omnibox. 131 // candidates may cause perceptible typing response delays in the omnibox.
125 // This is likely to occur for short omnibox terms such as 'h' and 'w' which 132 // This is likely to occur for short omnibox terms such as 'h' and 'w' which
126 // will be found in nearly all history candidates. Results are sorted by 133 // will be found in nearly all history candidates. Results are sorted by
127 // descending score. The full results set (i.e. beyond the 134 // descending score. The full results set (i.e. beyond the
128 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls 135 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls
129 // to this function. 136 // to this function.
130 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); 137 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string);
131 138
132 // Sets the |languages| to a list of language encodings with which the history 139 // Creates a new URLIndexPrivateData object, populates it from the contents
133 // URLs and omnibox searches are interpreted, i.e. how each is broken 140 // of the cache file stored in |file_path|, and assigns it to |private_data|.
134 // down into words and each word is broken down into characters. 141 // |languages| will be used to break URLs and page titles into words and is
135 void set_languages(const std::string& languages) { languages_ = languages; } 142 // deliberately passed by value.
143 static void RestoreFromFileTask(
144 const FilePath& file_path,
145 scoped_refptr<URLIndexPrivateData> private_data,
146 std::string languages);
136 147
137 // Restores the index's private data from the cache file stored in the 148 // Constructs a new object by restoring its contents from the file at |path|.
138 // profile directory and returns true if successful. 149 // Returns the new URLIndexPrivateData which on success will contain the
139 bool RestoreFromFile(const FilePath& file_path); 150 // restored data but upon failure will be empty. |languages| will be used to
151 // break URLs and page titles into words
152 static scoped_refptr<URLIndexPrivateData> RestoreFromFile(
153 const FilePath& path,
154 const std::string& languages);
140 155
141 // Constructs a new object by rebuilding its contents from the history 156 // Constructs a new object by rebuilding its contents from the history
142 // database in |history_db|. Returns the new URLIndexPrivateData which on 157 // database in |history_db|. Returns the new URLIndexPrivateData which on
143 // success will contain the rebuilt data but upon failure will be empty. 158 // success will contain the rebuilt data but upon failure will be empty.
144 static URLIndexPrivateData* RebuildFromHistory(HistoryDatabase* history_db); 159 // |languages| gives a list of language encodings by which the URLs and page
160 // titles are broken down into words and characters.
161 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory(
162 HistoryDatabase* history_db,
163 const std::string& languages,
164 const std::set<std::string>& scheme_whitelist);
165
166 // Writes |private_data| as a cache file to |file_path| and returns success
167 // via |succeeded|.
168 static void WritePrivateDataToCacheFileTask(
169 scoped_refptr<URLIndexPrivateData> private_data,
170 const FilePath& file_path,
171 scoped_refptr<RefCountedBool> succeeded);
145 172
146 // Caches the index private data and writes the cache file to the profile 173 // Caches the index private data and writes the cache file to the profile
147 // directory. 174 // directory. Called by WritePrivateDataToCacheFileTask.
148 bool SaveToFile(const FilePath& file_path); 175 bool SaveToFile(const FilePath& file_path);
149 176
150 // Initializes all index data members in preparation for restoring the index 177 // Initializes all index data members in preparation for restoring the index
151 // from the cache or a complete rebuild from the history database. 178 // from the cache or a complete rebuild from the history database.
152 void Clear(); 179 void Clear();
153 180
181 // Returns true if there is no data in the index.
182 bool Empty() const;
183
184 // Creates a copy of ourself.
185 scoped_refptr<URLIndexPrivateData> Duplicate() const;
186
154 // Adds |word_id| to |history_id|'s entry in the history/word map, 187 // Adds |word_id| to |history_id|'s entry in the history/word map,
155 // creating a new entry if one does not already exist. 188 // creating a new entry if one does not already exist.
156 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); 189 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
157 190
158 // Given a set of Char16s, finds words containing those characters. 191 // Given a set of Char16s, finds words containing those characters.
159 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); 192 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
160 193
161 // Initializes the whitelist of URL schemes.
162 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
163
164 // URL History indexing support functions. 194 // URL History indexing support functions.
165 195
166 // Indexes one URL history item as described by |row|. Returns true if the 196 // Indexes one URL history item as described by |row|. Returns true if the
167 // row was actually indexed. 197 // row was actually indexed. |languages| gives a list of language encodings by
168 bool IndexRow(const URLRow& row); 198 // which the URLs and page titles are broken down into words and characters.
199 // |scheme_whitelist| is used to filter non-qualifying schemes.
200 bool IndexRow(const URLRow& row,
201 const std::string& languages,
202 const std::set<std::string>& scheme_whitelist);
169 203
170 // Adds the history item in |row| to the index if it does not already already 204 // Adds the history item in |row| to the index if it does not already already
171 // exist and it meets the minimum 'quick' criteria. If the row already exists 205 // exist and it meets the minimum 'quick' criteria. If the row already exists
172 // in the index then the index will be updated if the row still meets the 206 // in the index then the index will be updated if the row still meets the
173 // criteria, otherwise the row will be removed from the index. Returns true 207 // criteria, otherwise the row will be removed from the index. Returns true
174 // if the index was actually updated. 208 // if the index was actually updated. |languages| gives a list of language
175 bool UpdateURL(const URLRow& row); 209 // encodings by which the URLs and page titles are broken down into words and
210 // characters. |scheme_whitelist| is used to filter non-qualifying schemes.
211 bool UpdateURL(const URLRow& row,
212 const std::string& languages,
213 const std::set<std::string>& scheme_whitelist);
176 214
177 // Deletes indexing data for the history item with the URL given in |url|. 215 // Deletes indexing data for the history item with the URL given in |url|.
178 // The item may not have actually been indexed, which is the case if it did 216 // The item may not have actually been indexed, which is the case if it did
179 // not previously meet minimum 'quick' criteria. Returns true if the index 217 // not previously meet minimum 'quick' criteria. Returns true if the index
180 // was actually updated. 218 // was actually updated.
181 bool DeleteURL(const GURL& url); 219 bool DeleteURL(const GURL& url);
182 220
183 // Parses and indexes the words in the URL and page title of |row| and 221 // Parses and indexes the words in the URL and page title of |row| and
184 // calculate the word starts in each, saving the starts in |word_starts|. 222 // calculate the word starts in each, saving the starts in |word_starts|.
185 void AddRowWordsToIndex(const URLRow& row, RowWordStarts* word_starts); 223 // |languages| gives a list of language encodings by which the URLs and page
224 // titles are broken down into words and characters.
225 void AddRowWordsToIndex(const URLRow& row,
226 RowWordStarts* word_starts,
227 const std::string& languages);
186 228
187 // Removes |row| and all associated words and characters from the index. 229 // Removes |row| and all associated words and characters from the index.
188 void RemoveRowFromIndex(const URLRow& row); 230 void RemoveRowFromIndex(const URLRow& row);
189 231
190 // Removes all words and characters associated with |row| from the index. 232 // Removes all words and characters associated with |row| from the index.
191 void RemoveRowWordsFromIndex(const URLRow& row); 233 void RemoveRowWordsFromIndex(const URLRow& row);
192 234
193 // Given a single word in |uni_word|, adds a reference for the containing 235 // Given a single word in |uni_word|, adds a reference for the containing
194 // history item identified by |history_id| to the index. 236 // history item identified by |history_id| to the index.
195 void AddWordToIndex(const string16& uni_word, HistoryID history_id); 237 void AddWordToIndex(const string16& uni_word, HistoryID history_id);
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 const string16& lower_string, 270 const string16& lower_string,
229 const String16Vector& terms_vector, 271 const String16Vector& terms_vector,
230 const RowWordStarts& word_starts); 272 const RowWordStarts& word_starts);
231 273
232 // Calculates a component score based on position, ordering and total 274 // Calculates a component score based on position, ordering and total
233 // substring match size using metrics recorded in |matches|. |max_length| 275 // substring match size using metrics recorded in |matches|. |max_length|
234 // is the length of the string against which the terms are being searched. 276 // is the length of the string against which the terms are being searched.
235 static int ScoreComponentForMatches(const TermMatches& matches, 277 static int ScoreComponentForMatches(const TermMatches& matches,
236 size_t max_length); 278 size_t max_length);
237 279
238 // Determines if |gurl| has a whitelisted scheme and returns true if so.
239 bool URLSchemeIsWhitelisted(const GURL& gurl) const;
240
241 // Sets the version of the cache file that will be saved when calling
242 // SavePrivateData(). For unit testing only.
243 void set_saved_cache_version(int version) { saved_cache_version_ = version; }
244
245 // Encode a data structure into the protobuf |cache|. 280 // Encode a data structure into the protobuf |cache|.
246 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; 281 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
247 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; 282 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
248 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 283 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
249 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 284 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
250 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; 285 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
251 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; 286 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
252 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; 287 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const;
253 288
254 // Decode a data structure from the protobuf |cache|. Return false if there 289 // Decode a data structure from the protobuf |cache|. Return false if there
255 // is any kind of failure. 290 // is any kind of failure. |languages| will be used to break URLs and page
256 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); 291 // titles into words
292 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache,
293 const std::string& languages);
257 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); 294 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
258 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); 295 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
259 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); 296 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
260 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); 297 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
261 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); 298 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
262 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache); 299 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache,
300 const std::string& languages);
301
302 // Determines if |gurl| has a whitelisted scheme and returns true if so.
303 static bool URLSchemeIsWhitelisted(const GURL& gurl,
304 const std::set<std::string>& whitelist);
263 305
264 // Cache of search terms. 306 // Cache of search terms.
265 SearchTermCacheMap search_term_cache_; 307 SearchTermCacheMap search_term_cache_;
266 308
267 // Languages used during the word-breaking process during indexing.
268 std::string languages_;
269
270 // Only URLs with a whitelisted scheme are indexed.
271 std::set<std::string> scheme_whitelist_;
272
273 // Start of data members that are cached ------------------------------------- 309 // Start of data members that are cached -------------------------------------
274 310
275 // The version of the cache file most recently used to restore this instance 311 // The version of the cache file most recently used to restore this instance
276 // of the private data. If the private data was rebuilt from the history 312 // of the private data. If the private data was rebuilt from the history
277 // database this will be 0. 313 // database this will be 0.
278 int restored_cache_version_; 314 int restored_cache_version_;
279 315
280 // A list of all of indexed words. The index of a word in this list is the 316 // A list of all of indexed words. The index of a word in this list is the
281 // ID of the word in the word_map_. It reduces the memory overhead by 317 // ID of the word in the word_map_. It reduces the memory overhead by
282 // replacing a potentially long and repeated string with a simple index. 318 // replacing a potentially long and repeated string with a simple index.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
327 // Used for unit testing only. Records the number of candidate history items 363 // Used for unit testing only. Records the number of candidate history items
328 // at three stages in the index searching process. 364 // at three stages in the index searching process.
329 size_t pre_filter_item_count_; // After word index is queried. 365 size_t pre_filter_item_count_; // After word index is queried.
330 size_t post_filter_item_count_; // After trimming large result set. 366 size_t post_filter_item_count_; // After trimming large result set.
331 size_t post_scoring_item_count_; // After performing final filter/scoring. 367 size_t post_scoring_item_count_; // After performing final filter/scoring.
332 }; 368 };
333 369
334 } // namespace history 370 } // namespace history
335 371
336 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 372 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_url_index_unittest.cc ('k') | chrome/browser/history/url_index_private_data.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698