Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: chrome/browser/history/url_index_private_data.h

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Added missing changed file to CL. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
7 #pragma once 7 #pragma once
8 8
9 #include "base/file_path.h" 9 #include "base/file_path.h"
10 #include "base/gtest_prod_util.h" 10 #include "base/gtest_prod_util.h"
11 #include "base/memory/ref_counted.h"
11 #include "chrome/browser/history/in_memory_url_index_types.h" 12 #include "chrome/browser/history/in_memory_url_index_types.h"
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" 13 #include "chrome/browser/history/in_memory_url_index_cache.pb.h"
14 #include "content/public/browser/notification_details.h"
13 15
14 class HistoryQuickProviderTest; 16 class HistoryQuickProviderTest;
15 17
16 namespace in_memory_url_index { 18 namespace in_memory_url_index {
17 class InMemoryURLIndexCacheItem; 19 class InMemoryURLIndexCacheItem;
18 } 20 }
19 21
20 namespace history { 22 namespace history {
21 23
22 namespace imui = in_memory_url_index; 24 namespace imui = in_memory_url_index;
23 25
24 class HistoryDatabase; 26 class HistoryDatabase;
27 class InMemoryURLIndex;
28 class RefCountedBool;
25 29
26 // A structure describing the InMemoryURLIndex's internal data and providing for 30 // A structure describing the InMemoryURLIndex's internal data and providing for
27 // restoring, rebuilding and updating that internal data. 31 // restoring, rebuilding and updating that internal data.
28 class URLIndexPrivateData { 32 class URLIndexPrivateData
33 : public base::RefCountedThreadSafe<URLIndexPrivateData> {
29 public: 34 public:
30 URLIndexPrivateData(); 35 URLIndexPrivateData();
36
37 private:
38 friend class base::RefCountedThreadSafe<URLIndexPrivateData>;
31 ~URLIndexPrivateData(); 39 ~URLIndexPrivateData();
32 40
33 private:
34 friend class AddHistoryMatch; 41 friend class AddHistoryMatch;
35 friend class ::HistoryQuickProviderTest; 42 friend class ::HistoryQuickProviderTest;
36 friend class InMemoryURLIndex; 43 friend class InMemoryURLIndex;
37 friend class InMemoryURLIndexTest; 44 friend class InMemoryURLIndexTest;
38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); 45 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
39 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); 46 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet);
40 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); 47 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); 48 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
42 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); 49 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
43 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); 50 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs);
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 // or page title. Scores are then calculated on no more than 126 // or page title. Scores are then calculated on no more than
120 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of 127 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of
121 // candidates may cause perceptible typing response delays in the omnibox. 128 // candidates may cause perceptible typing response delays in the omnibox.
122 // This is likely to occur for short omnibox terms such as 'h' and 'w' which 129 // This is likely to occur for short omnibox terms such as 'h' and 'w' which
123 // will be found in nearly all history candidates. Results are sorted by 130 // will be found in nearly all history candidates. Results are sorted by
124 // descending score. The full results set (i.e. beyond the 131 // descending score. The full results set (i.e. beyond the
125 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls 132 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls
126 // to this function. 133 // to this function.
127 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); 134 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string);
128 135
129 // Sets the |languages| to a list of language encodings with which the history 136 // Creates a new URLIndexPrivateData object, populates it from the contents
130 // URLs and omnibox searches are interpreted, i.e. how each is broken 137 // of the cache file stored in |file_path|, and assigns it to
131 // down into words and each word is broken down into characters. 138 // |private_data_ptr|.
132 void set_languages(const std::string& languages) { languages_ = languages; } 139 static void RestoreFromFileTask(
140 const FilePath& file_path,
141 scoped_refptr<URLIndexPrivateData> private_data_ptr);
133 142
134 // Restores the index's private data from the cache file stored in the 143 // Constructs a new object by restoring its contents from the file at |path|.
135 // profile directory and returns true if successful. 144 // Returns the new URLIndexPrivateData which on success will contain the
136 bool RestoreFromFile(const FilePath& file_path); 145 // restored data but upon failure will be empty.
146 static scoped_refptr<URLIndexPrivateData> RestoreFromFile(
147 const FilePath& path);
137 148
138 // Constructs a new object by rebuilding its contents from the history 149 // Constructs a new object by rebuilding its contents from the history
139 // database in |history_db|. Returns the new URLIndexPrivateData which on 150 // database in |history_db|. Returns the new URLIndexPrivateData which on
140 // success will contain the rebuilt data but upon failure will be empty. 151 // success will contain the rebuilt data but upon failure will be empty.
141 static URLIndexPrivateData* RebuildFromHistory(HistoryDatabase* history_db); 152 // |languages| gives a list of language encodings by which the URLs and page
153 // titles are broken down into words and characters.
154 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory(
155 HistoryDatabase* history_db,
156 const std::string& languages,
157 const std::set<std::string>& scheme_whitelist);
158
159 // Writes |private_data| as a cache file to |file_path| and returns success
160 // via |succeeded|.
161 static void WritePrivateDataToCacheFileTask(
162 scoped_refptr<URLIndexPrivateData> private_data,
163 const FilePath& file_path,
164 scoped_refptr<RefCountedBool> succeeded);
142 165
143 // Caches the index private data and writes the cache file to the profile 166 // Caches the index private data and writes the cache file to the profile
144 // directory. 167 // directory. Called by WritePrivateDataToCacheFileTask.
145 bool SaveToFile(const FilePath& file_path); 168 bool SaveToFile(const FilePath& file_path);
146 169
147 // Initializes all index data members in preparation for restoring the index 170 // Initializes all index data members in preparation for restoring the index
148 // from the cache or a complete rebuild from the history database. 171 // from the cache or a complete rebuild from the history database.
149 void Clear(); 172 void Clear();
150 173
174 // Returns true if there is no data in the index.
175 bool Empty() const;
176
177 // Creates a copy of ourself.
178 URLIndexPrivateData* Duplicate() const;
179
151 // Adds |word_id| to |history_id|'s entry in the history/word map, 180 // Adds |word_id| to |history_id|'s entry in the history/word map,
152 // creating a new entry if one does not already exist. 181 // creating a new entry if one does not already exist.
153 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); 182 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
154 183
155 // Given a set of Char16s, finds words containing those characters. 184 // Given a set of Char16s, finds words containing those characters.
156 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); 185 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
157 186
158 // Initializes the whitelist of URL schemes.
159 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
160
161 // URL History indexing support functions. 187 // URL History indexing support functions.
162 188
163 // Indexes one URL history item as described by |row|. Returns true if the 189 // Indexes one URL history item as described by |row|. Returns true if the
164 // row was actually indexed. 190 // row was actually indexed. |languages| gives a list of language encodings by
165 bool IndexRow(const URLRow& row); 191 // which the URLs and page titles are broken down into words and characters.
192 // |scheme_whitelist| is used to filter non-qualifying schemes.
193 bool IndexRow(const URLRow& row,
194 const std::string& languages,
195 const std::set<std::string>& scheme_whitelist);
166 196
167 // Adds the history item in |row| to the index if it does not already already 197 // Adds the history item in |row| to the index if it does not already already
168 // exist and it meets the minimum 'quick' criteria. If the row already exists 198 // exist and it meets the minimum 'quick' criteria. If the row already exists
169 // in the index then the index will be updated if the row still meets the 199 // in the index then the index will be updated if the row still meets the
170 // criteria, otherwise the row will be removed from the index. Returns true 200 // criteria, otherwise the row will be removed from the index. Returns true
171 // if the index was actually updated. 201 // if the index was actually updated. |languages| gives a list of language
172 bool UpdateURL(const URLRow& row); 202 // encodings by which the URLs and page titles are broken down into words and
203 // characters. |scheme_whitelist| is used to filter non-qualifying schemes.
204 bool UpdateURL(const URLRow& row,
205 const std::string& languages,
206 const std::set<std::string>& scheme_whitelist);
173 207
174 // Deletes indexing data for the history item with the URL given in |url|. 208 // Deletes indexing data for the history item with the URL given in |url|.
175 // The item may not have actually been indexed, which is the case if it did 209 // The item may not have actually been indexed, which is the case if it did
176 // not previously meet minimum 'quick' criteria. Returns true if the index 210 // not previously meet minimum 'quick' criteria. Returns true if the index
177 // was actually updated. 211 // was actually updated.
178 bool DeleteURL(const GURL& url); 212 bool DeleteURL(const GURL& url);
179 213
180 // Parses and indexes the words in the URL and page title of |row|. 214 // Parses and indexes the words in the URL and page title of |row|.
181 void AddRowWordsToIndex(const URLRow& row); 215 // |languages| gives a list of language encodings by which the URLs and page
216 // titles are broken down into words and characters.
217 void AddRowWordsToIndex(const URLRow& row,
218 const std::string& languages);
182 219
183 // Removes |row| and all associated words and characters from the index. 220 // Removes |row| and all associated words and characters from the index.
184 void RemoveRowFromIndex(const URLRow& row); 221 void RemoveRowFromIndex(const URLRow& row);
185 222
186 // Removes all words and characters associated with |row| from the index. 223 // Removes all words and characters associated with |row| from the index.
187 void RemoveRowWordsFromIndex(const URLRow& row); 224 void RemoveRowWordsFromIndex(const URLRow& row);
188 225
189 // Given a single word in |uni_word|, adds a reference for the containing 226 // Given a single word in |uni_word|, adds a reference for the containing
190 // history item identified by |history_id| to the index. 227 // history item identified by |history_id| to the index.
191 void AddWordToIndex(const string16& uni_word, HistoryID history_id); 228 void AddWordToIndex(const string16& uni_word, HistoryID history_id);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
223 const URLRow& row, 260 const URLRow& row,
224 const string16& lower_string, 261 const string16& lower_string,
225 const String16Vector& terms_vector); 262 const String16Vector& terms_vector);
226 263
227 // Calculates a component score based on position, ordering and total 264 // Calculates a component score based on position, ordering and total
228 // substring match size using metrics recorded in |matches|. |max_length| 265 // substring match size using metrics recorded in |matches|. |max_length|
229 // is the length of the string against which the terms are being searched. 266 // is the length of the string against which the terms are being searched.
230 static int ScoreComponentForMatches(const TermMatches& matches, 267 static int ScoreComponentForMatches(const TermMatches& matches,
231 size_t max_length); 268 size_t max_length);
232 269
233 // Determines if |gurl| has a whitelisted scheme and returns true if so.
234 bool URLSchemeIsWhitelisted(const GURL& gurl) const;
235
236 // Encode a data structure into the protobuf |cache|. 270 // Encode a data structure into the protobuf |cache|.
237 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; 271 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
238 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; 272 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
239 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 273 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
240 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 274 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
241 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; 275 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
242 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; 276 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
243 277
244 // Decode a data structure from the protobuf |cache|. Return false if there 278 // Decode a data structure from the protobuf |cache|. Return false if there
245 // is any kind of failure. 279 // is any kind of failure.
246 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); 280 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache);
247 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); 281 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
248 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); 282 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
249 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); 283 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
250 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); 284 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
251 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); 285 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
252 286
287 // Determines if |gurl| has a whitelisted scheme and returns true if so.
288 static bool URLSchemeIsWhitelisted(const GURL& gurl,
289 const std::set<std::string>& whitelist);
290
253 // Cache of search terms. 291 // Cache of search terms.
254 SearchTermCacheMap search_term_cache_; 292 SearchTermCacheMap search_term_cache_;
255 293
256 // Languages used during the word-breaking process during indexing.
257 std::string languages_;
258
259 // Only URLs with a whitelisted scheme are indexed.
260 std::set<std::string> scheme_whitelist_;
261
262 // Start of data members that are cached ------------------------------------- 294 // Start of data members that are cached -------------------------------------
263 295
264 // A list of all of indexed words. The index of a word in this list is the 296 // A list of all of indexed words. The index of a word in this list is the
265 // ID of the word in the word_map_. It reduces the memory overhead by 297 // ID of the word in the word_map_. It reduces the memory overhead by
266 // replacing a potentially long and repeated string with a simple index. 298 // replacing a potentially long and repeated string with a simple index.
267 String16Vector word_list_; 299 String16Vector word_list_;
268 300
269 // A list of available words slots in |word_list_|. An available word slot 301 // A list of available words slots in |word_list_|. An available word slot
270 // is the index of a unused word in word_list_ vector, also referred to as 302 // is the index of a unused word in word_list_ vector, also referred to as
271 // a WordID. As URL visits are added or modified new words may be added to 303 // a WordID. As URL visits are added or modified new words may be added to
(...skipping 30 matching lines...) Expand all
302 // Used for unit testing only. Records the number of candidate history items 334 // Used for unit testing only. Records the number of candidate history items
303 // at three stages in the index searching process. 335 // at three stages in the index searching process.
304 size_t pre_filter_item_count_; // After word index is queried. 336 size_t pre_filter_item_count_; // After word index is queried.
305 size_t post_filter_item_count_; // After trimming large result set. 337 size_t post_filter_item_count_; // After trimming large result set.
306 size_t post_scoring_item_count_; // After performing final filter/scoring. 338 size_t post_scoring_item_count_; // After performing final filter/scoring.
307 }; 339 };
308 340
309 } // namespace history 341 } // namespace history
310 342
311 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ 343 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698