Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: chrome/browser/history/in_memory_url_index.h

Issue 8451009: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
7 #pragma once 7 #pragma once
8 8
9 #include <functional> 9 #include <functional>
10 #include <map> 10 #include <map>
11 #include <set> 11 #include <set>
12 #include <string> 12 #include <string>
13 #include <vector> 13 #include <vector>
14 14
15 #include "base/basictypes.h" 15 #include "base/basictypes.h"
16 #include "base/file_path.h" 16 #include "base/file_path.h"
17 #include "base/gtest_prod_util.h" 17 #include "base/gtest_prod_util.h"
18 #include "base/memory/linked_ptr.h" 18 #include "base/memory/linked_ptr.h"
19 #include "base/memory/scoped_ptr.h" 19 #include "base/memory/scoped_ptr.h"
20 #include "base/string16.h" 20 #include "base/string16.h"
21 #include "chrome/browser/autocomplete/autocomplete_match.h" 21 #include "chrome/browser/autocomplete/autocomplete_match.h"
22 #include "chrome/browser/autocomplete/history_provider_util.h" 22 #include "chrome/browser/autocomplete/history_provider_util.h"
23 #include "chrome/browser/history/history_types.h" 23 #include "chrome/browser/history/history_types.h"
24 #include "chrome/browser/history/in_memory_url_index_types.h" 24 #include "chrome/browser/history/in_memory_url_index_types.h"
25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h"
26 #include "content/public/browser/notification_observer.h"
27 #include "content/public/browser/notification_registrar.h"
26 #include "sql/connection.h" 28 #include "sql/connection.h"
27 #include "testing/gtest/include/gtest/gtest_prod.h" 29 #include "testing/gtest/include/gtest/gtest_prod.h"
28 30
29 class Profile; 31 class Profile;
30 32
31 namespace base {
32 class Time;
33 }
34
35 namespace in_memory_url_index { 33 namespace in_memory_url_index {
36 class InMemoryURLIndexCacheItem; 34 class InMemoryURLIndexCacheItem;
37 } 35 }
38 36
39 namespace history { 37 namespace history {
40 38
41 namespace imui = in_memory_url_index; 39 namespace imui = in_memory_url_index;
42 40
43 class URLDatabase; 41 class URLDatabase;
42 struct URLsDeletedDetails;
43 struct URLsModifiedDetails;
44 struct URLVisitedDetails;
44 45
45 // The URL history source. 46 // The URL history source.
46 // Holds portions of the URL database in memory in an indexed form. Used to 47 // Holds portions of the URL database in memory in an indexed form. Used to
47 // quickly look up matching URLs for a given query string. Used by 48 // quickly look up matching URLs for a given query string. Used by
48 // the HistoryURLProvider for inline autocomplete and to provide URL 49 // the HistoryURLProvider for inline autocomplete and to provide URL
49 // matches to the omnibox. 50 // matches to the omnibox.
50 // 51 //
51 // Note about multi-byte codepoints and the data structures in the 52 // Note about multi-byte codepoints and the data structures in the
52 // InMemoryURLIndex class: One will quickly notice that no effort is made to 53 // InMemoryURLIndex class: One will quickly notice that no effort is made to
53 // insure that multi-byte character boundaries are detected when indexing the 54 // insure that multi-byte character boundaries are detected when indexing the
54 // words and characters in the URL history database except when converting 55 // words and characters in the URL history database except when converting
55 // URL strings to lowercase. Multi-byte-edness makes no difference when 56 // URL strings to lowercase. Multi-byte-edness makes no difference when
56 // indexing or when searching the index as the final filtering of results 57 // indexing or when searching the index as the final filtering of results
57 // is dependent on the comparison of a string of bytes, not individual 58 // is dependent on the comparison of a string of bytes, not individual
58 // characters. While the lookup of those bytes during a search in the 59 // characters. While the lookup of those bytes during a search in the
59 // |char_word_map_| could serve up words in which the individual char16 60 // |char_word_map_| could serve up words in which the individual char16
60 // occurs as a portion of a composite character the next filtering step 61 // occurs as a portion of a composite character the next filtering step
61 // will eliminate such words except in the case where a single character 62 // will eliminate such words except in the case where a single character
62 // is being searched on and which character occurs as the second char16 of a 63 // is being searched on and which character occurs as the second char16 of a
63 // multi-char16 instance. 64 // multi-char16 instance.
64 class InMemoryURLIndex { 65 class InMemoryURLIndex : public content::NotificationObserver {
65 public: 66 public:
66 // |history_dir| is a path to the directory containing the history database 67 // |history_dir| is a path to the directory containing the history database
67 // within the profile wherein the cache and transaction journals will be 68 // within the profile wherein the cache and transaction journals will be
68 // stored. 69 // stored.
69 explicit InMemoryURLIndex(const FilePath& history_dir); 70 explicit InMemoryURLIndex(Profile* profile,
Peter Kasting 2011/11/21 19:07:50 Nit: "explicit" no longer needed
mrossetti 2011/11/21 21:53:25 Done.
71 const FilePath& history_dir);
70 virtual ~InMemoryURLIndex(); 72 virtual ~InMemoryURLIndex();
71 73
72 // Opens and indexes the URL history database. 74 // Restores our index from its cache, if possible. If the cache is not
75 // available then we will register for the NOTIFICATION_HISTORY_LOADED
76 // notifications and then rebuild the index from the history database.
73 // |languages| gives a list of language encodings with which the history 77 // |languages| gives a list of language encodings with which the history
74 // URLs and omnibox searches are interpreted, i.e. when each is broken 78 // URLs and omnibox searches are interpreted, i.e. when each is broken
75 // down into words and each word is broken down into characters. 79 // down into words and each word is broken down into characters.
76 bool Init(URLDatabase* history_db, const std::string& languages); 80 void Init(const std::string& languages);
77 81
78 // Reloads the history index. Attempts to reload from the cache unless 82 // Reloads the history index from the history database given in |history_db|.
79 // |clear_cache| is true. If the cache is unavailable then reload the 83 void ReloadFromHistory(URLDatabase* history_db);
80 // index from |history_db|.
81 bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache);
82 84
83 // Signals that any outstanding initialization should be canceled and 85 // Signals that any outstanding initialization should be canceled and
84 // flushes the cache to disk. 86 // flushes the cache to disk.
85 void ShutDown(); 87 void ShutDown();
86 88
87 // Restores the index's private data from the cache file stored in the
88 // profile directory and returns true if successful.
89 bool RestoreFromCacheFile();
90
91 // Caches the index private data and writes the cache file to the profile 89 // Caches the index private data and writes the cache file to the profile
92 // directory. 90 // directory.
93 bool SaveToCacheFile(); 91 bool SaveToCacheFile();
94 92
95 // Given a vector containing one or more words as string16s, scans the 93 // Given a vector containing one or more words as string16s, scans the
96 // history index and return a vector with all scored, matching history items. 94 // history index and return a vector with all scored, matching history items.
97 // Each term must occur somewhere in the history item's URL or page title for 95 // Each term must occur somewhere in the history item's URL or page title for
98 // the item to qualify; however, the terms do not necessarily have to be 96 // the item to qualify; however, the terms do not necessarily have to be
99 // adjacent. Results are sorted with higher scoring items first. Each term 97 // adjacent. Results are sorted with higher scoring items first. Each term
100 // from |terms| may contain punctuation but should not contain spaces. 98 // from |terms| may contain punctuation but should not contain spaces.
101 // A search request which results in more than |kItemsToScoreLimit| total 99 // A search request which results in more than |kItemsToScoreLimit| total
102 // candidate items returns no matches (though the results set will be 100 // candidate items returns no matches (though the results set will be
103 // retained and used for subsequent calls to this function) as the scoring 101 // retained and used for subsequent calls to this function) as the scoring
104 // of such a large number of candidates may cause perceptible typing response 102 // of such a large number of candidates may cause perceptible typing response
105 // delays in the omnibox. This is likely to occur for short omnibox terms 103 // delays in the omnibox. This is likely to occur for short omnibox terms
106 // such as 'h' and 'w' which will be found in nearly all history candidates. 104 // such as 'h' and 'w' which will be found in nearly all history candidates.
107 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); 105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms);
108 106
109 // Updates or adds an history item to the index if it meets the minimum 107 // Updates or adds an history item to the index if it meets the minimum
110 // 'quick' criteria. 108 // selection criteria.
111 void UpdateURL(URLID row_id, const URLRow& row); 109 void UpdateURL(const URLRow& row);
112 110
113 // Deletes indexing data for an history item. The item may not have actually 111 // Deletes indexing data for an history item. The item may not have actually
114 // been indexed (which is the case if it did not previously meet minimum 112 // been indexed (which is the case if it did not previously meet minimum
115 // 'quick' criteria). 113 // 'quick' criteria).
116 void DeleteURL(URLID row_id); 114 void DeleteURL(const URLRow& row);
115
116 // Notification callback.
117 virtual void Observe(int type,
118 const content::NotificationSource& source,
119 const content::NotificationDetails& details);
117 120
118 private: 121 private:
119 friend class AddHistoryMatch; 122 friend class AddHistoryMatch;
120 friend class InMemoryURLIndexTest; 123 friend class InMemoryURLIndexTest;
121 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); 124 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath);
123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities);
125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets);
126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 void operator()(const HistoryID history_id); 182 void operator()(const HistoryID history_id);
180 183
181 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } 184 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; }
182 185
183 private: 186 private:
184 const InMemoryURLIndex& index_; 187 const InMemoryURLIndex& index_;
185 ScoredHistoryMatches scored_matches_; 188 ScoredHistoryMatches scored_matches_;
186 const String16Vector& lower_terms_; 189 const String16Vector& lower_terms_;
187 }; 190 };
188 191
192 // Initialization and Restoration --------------------------------------------
193
194 // Restores the index's private data from the cache, if possible, otherwise
195 // register to be notified when the history database becomes available.
Peter Kasting 2011/11/21 19:07:50 Nit: register -> registers
mrossetti 2011/11/21 21:53:25 Done.
196 void RestoreFromCache();
197
198 // Restores the index's private data from the cache file stored in the
199 // profile directory and returns true if successful.
200 bool RestoreFromCacheFile();
201
189 // Initializes all index data members in preparation for restoring the index 202 // Initializes all index data members in preparation for restoring the index
190 // from the cache or a complete rebuild from the history database. 203 // from the cache or a complete rebuild from the history database.
191 void ClearPrivateData(); 204 void ClearPrivateData();
192 205
193 // Initializes the whitelist of URL schemes. 206 // Initializes the whitelist of URL schemes.
194 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); 207 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
195 208
196 // URL History indexing support functions. 209 // URL History Indexing ------------------------------------------------------
197 210
198 // Indexes one URL history item. 211 // Indexes one URL history item.
199 void IndexRow(const URLRow& row); 212 void IndexRow(const URLRow& row);
200 213
201 // Parses and indexes the words in the URL and page title of |row|. 214 // Parses and indexes the words in the URL and page title of |row|.
202 void AddRowWordsToIndex(const URLRow& row); 215 void AddRowWordsToIndex(const URLRow& row);
203 216
204 // Removes |row| and all associated words and characters from the index. 217 // Removes |row| and all associated words and characters from the index.
205 void RemoveRowFromIndex(const URLRow& row); 218 void RemoveRowFromIndex(const URLRow& row);
206 219
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 259
247 // Calculates a component score based on position, ordering and total 260 // Calculates a component score based on position, ordering and total
248 // substring match size using metrics recorded in |matches|. |max_length| 261 // substring match size using metrics recorded in |matches|. |max_length|
249 // is the length of the string against which the terms are being searched. 262 // is the length of the string against which the terms are being searched.
250 static int ScoreComponentForMatches(const TermMatches& matches, 263 static int ScoreComponentForMatches(const TermMatches& matches,
251 size_t max_length); 264 size_t max_length);
252 265
253 // Determines if |gurl| has a whitelisted scheme and returns true if so. 266 // Determines if |gurl| has a whitelisted scheme and returns true if so.
254 bool URLSchemeIsWhitelisted(const GURL& gurl) const; 267 bool URLSchemeIsWhitelisted(const GURL& gurl) const;
255 268
269 // Notification handlers.
270 void OnURLVisited(const URLVisitedDetails* details);
271 void OnURLsModified(const URLsModifiedDetails* details);
272 void OnURLsDeleted(const URLsDeletedDetails* details);
273
256 // Utility functions supporting RestoreFromCache and SaveToCache. 274 // Utility functions supporting RestoreFromCache and SaveToCache.
257 275
258 // Construct a file path for the cache file within the same directory where 276 // Construct a file path for the cache file within the same directory where
259 // the history database is kept and saves that path to |file_path|. Returns 277 // the history database is kept and saves that path to |file_path|. Returns
260 // true if |file_path| can be successfully constructed. (This function 278 // true if |file_path| can be successfully constructed. (This function
261 // provided as a hook for unit testing.) 279 // provided as a hook for unit testing.)
262 bool GetCacheFilePath(FilePath* file_path); 280 bool GetCacheFilePath(FilePath* file_path);
263 281
264 // Encode a data structure into the protobuf |cache|. 282 // Encode a data structure into the protobuf |cache|.
265 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; 283 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
266 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; 284 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
267 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 285 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
268 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 286 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
269 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; 287 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
270 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; 288 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
271 289
272 // Decode a data structure from the protobuf |cache|. Return false if there 290 // Decode a data structure from the protobuf |cache|. Return false if there
273 // is any kind of failure. 291 // is any kind of failure.
274 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); 292 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache);
275 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); 293 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
276 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); 294 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
277 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); 295 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
278 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); 296 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
279 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); 297 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
280 298
299 content::NotificationRegistrar registrar_;
300
301 // The profile with which we are associated.
302 Profile* profile_;
303
281 // Directory where cache file resides. This is, except when unit testing, 304 // Directory where cache file resides. This is, except when unit testing,
282 // the same directory in which the profile's history database is found. It 305 // the same directory in which the profile's history database is found. It
283 // should never be empty. 306 // should never be empty.
284 FilePath history_dir_; 307 FilePath history_dir_;
285 308
286 // The timestamp of when the cache was last saved. This is used to validate 309 // The timestamp of when the cache was last saved. This is used to validate
287 // the transaction journal's applicability to the cache. The timestamp is 310 // the transaction journal's applicability to the cache. The timestamp is
288 // initialized to the NULL time, indicating that the cache was not used with 311 // initialized to the NULL time, indicating that the cache was not used with
289 // the InMemoryURLIndex was last populated. 312 // the InMemoryURLIndex was last populated.
290 base::Time last_saved_; 313 base::Time last_saved_;
(...skipping 16 matching lines...) Expand all
307 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. 330 // TODO(mrossetti): Eliminate once the transition to SQLite has been done.
308 // http://crbug.com/83659 331 // http://crbug.com/83659
309 bool cached_at_shutdown_; 332 bool cached_at_shutdown_;
310 333
311 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); 334 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
312 }; 335 };
313 336
314 } // namespace history 337 } // namespace history
315 338
316 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 339 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698