Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: chrome/browser/history/in_memory_url_index.h

Issue 8451009: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
7 #pragma once 7 #pragma once
8 8
9 #include <functional> 9 #include <functional>
10 #include <map> 10 #include <map>
11 #include <set> 11 #include <set>
12 #include <string> 12 #include <string>
13 #include <vector> 13 #include <vector>
14 14
15 #include "base/basictypes.h" 15 #include "base/basictypes.h"
16 #include "base/file_path.h" 16 #include "base/file_path.h"
17 #include "base/gtest_prod_util.h" 17 #include "base/gtest_prod_util.h"
18 #include "base/memory/linked_ptr.h" 18 #include "base/memory/linked_ptr.h"
19 #include "base/memory/scoped_ptr.h" 19 #include "base/memory/scoped_ptr.h"
20 #include "base/string16.h" 20 #include "base/string16.h"
21 #include "chrome/browser/autocomplete/autocomplete_match.h" 21 #include "chrome/browser/autocomplete/autocomplete_match.h"
22 #include "chrome/browser/autocomplete/history_provider_util.h" 22 #include "chrome/browser/autocomplete/history_provider_util.h"
23 #include "chrome/browser/history/history_types.h" 23 #include "chrome/browser/history/history_types.h"
24 #include "chrome/browser/history/in_memory_url_index_types.h" 24 #include "chrome/browser/history/in_memory_url_index_types.h"
25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h"
26 #include "content/public/browser/notification_observer.h"
27 #include "content/public/browser/notification_registrar.h"
26 #include "sql/connection.h" 28 #include "sql/connection.h"
27 #include "testing/gtest/include/gtest/gtest_prod.h" 29 #include "testing/gtest/include/gtest/gtest_prod.h"
28 30
29 namespace base { 31 class Profile;
30 class Time;
31 }
32 32
33 namespace in_memory_url_index { 33 namespace in_memory_url_index {
34 class InMemoryURLIndexCacheItem; 34 class InMemoryURLIndexCacheItem;
35 } 35 }
36 36
37 namespace history { 37 namespace history {
38 38
39 namespace imui = in_memory_url_index; 39 namespace imui = in_memory_url_index;
40 40
41 class URLDatabase; 41 class URLDatabase;
42 struct URLsDeletedDetails;
43 struct URLsModifiedDetails;
44 struct URLVisitedDetails;
42 45
43 // The URL history source. 46 // The URL history source.
44 // Holds portions of the URL database in memory in an indexed form. Used to 47 // Holds portions of the URL database in memory in an indexed form. Used to
45 // quickly look up matching URLs for a given query string. Used by 48 // quickly look up matching URLs for a given query string. Used by
46 // the HistoryURLProvider for inline autocomplete and to provide URL 49 // the HistoryURLProvider for inline autocomplete and to provide URL
47 // matches to the omnibox. 50 // matches to the omnibox.
48 // 51 //
49 // Note about multi-byte codepoints and the data structures in the 52 // Note about multi-byte codepoints and the data structures in the
50 // InMemoryURLIndex class: One will quickly notice that no effort is made to 53 // InMemoryURLIndex class: One will quickly notice that no effort is made to
51 // insure that multi-byte character boundaries are detected when indexing the 54 // insure that multi-byte character boundaries are detected when indexing the
52 // words and characters in the URL history database except when converting 55 // words and characters in the URL history database except when converting
53 // URL strings to lowercase. Multi-byte-edness makes no difference when 56 // URL strings to lowercase. Multi-byte-edness makes no difference when
54 // indexing or when searching the index as the final filtering of results 57 // indexing or when searching the index as the final filtering of results
55 // is dependent on the comparison of a string of bytes, not individual 58 // is dependent on the comparison of a string of bytes, not individual
56 // characters. While the lookup of those bytes during a search in the 59 // characters. While the lookup of those bytes during a search in the
57 // |char_word_map_| could serve up words in which the individual char16 60 // |char_word_map_| could serve up words in which the individual char16
58 // occurs as a portion of a composite character the next filtering step 61 // occurs as a portion of a composite character the next filtering step
59 // will eliminate such words except in the case where a single character 62 // will eliminate such words except in the case where a single character
60 // is being searched on and which character occurs as the second char16 of a 63 // is being searched on and which character occurs as the second char16 of a
61 // multi-char16 instance. 64 // multi-char16 instance.
62 class InMemoryURLIndex { 65 class InMemoryURLIndex : public content::NotificationObserver {
63 public: 66 public:
64 // |history_dir| is a path to the directory containing the history database 67 // |history_dir| is a path to the directory containing the history database
65 // within the profile wherein the cache and transaction journals will be 68 // within the profile wherein the cache and transaction journals will be
66 // stored. 69 // stored.
67 explicit InMemoryURLIndex(const FilePath& history_dir); 70 InMemoryURLIndex(Profile* profile, const FilePath& history_dir);
68 virtual ~InMemoryURLIndex(); 71 virtual ~InMemoryURLIndex();
69 72
70 // Opens and indexes the URL history database. 73 // Restores our index from its cache, if possible. If the cache is not
74 // available then we will register for the NOTIFICATION_HISTORY_LOADED
75 // notifications and then rebuild the index from the history database.
71 // |languages| gives a list of language encodings with which the history 76 // |languages| gives a list of language encodings with which the history
72 // URLs and omnibox searches are interpreted, i.e. when each is broken 77 // URLs and omnibox searches are interpreted, i.e. when each is broken
73 // down into words and each word is broken down into characters. 78 // down into words and each word is broken down into characters.
74 bool Init(URLDatabase* history_db, const std::string& languages); 79 void Init(const std::string& languages);
75 80
76 // Reloads the history index. Attempts to reload from the cache unless 81 // Reloads the history index from the history database given in |history_db|.
77 // |clear_cache| is true. If the cache is unavailable then reload the 82 void ReloadFromHistory(URLDatabase* history_db);
78 // index from |history_db|.
79 bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache);
80 83
81 // Signals that any outstanding initialization should be canceled and 84 // Signals that any outstanding initialization should be canceled and
82 // flushes the cache to disk. 85 // flushes the cache to disk.
83 void ShutDown(); 86 void ShutDown();
84 87
85 // Restores the index's private data from the cache file stored in the
86 // profile directory and returns true if successful.
87 bool RestoreFromCacheFile();
88
89 // Caches the index private data and writes the cache file to the profile 88 // Caches the index private data and writes the cache file to the profile
90 // directory. 89 // directory.
91 bool SaveToCacheFile(); 90 bool SaveToCacheFile();
92 91
93 // Given a vector containing one or more words as string16s, scans the 92 // Given a vector containing one or more words as string16s, scans the
94 // history index and return a vector with all scored, matching history items. 93 // history index and return a vector with all scored, matching history items.
95 // Each term must occur somewhere in the history item's URL or page title for 94 // Each term must occur somewhere in the history item's URL or page title for
96 // the item to qualify; however, the terms do not necessarily have to be 95 // the item to qualify; however, the terms do not necessarily have to be
97 // adjacent. Results are sorted with higher scoring items first. Each term 96 // adjacent. Results are sorted with higher scoring items first. Each term
98 // from |terms| may contain punctuation but should not contain spaces. 97 // from |terms| may contain punctuation but should not contain spaces.
99 // A search request which results in more than |kItemsToScoreLimit| total 98 // A search request which results in more than |kItemsToScoreLimit| total
100 // candidate items returns no matches (though the results set will be 99 // candidate items returns no matches (though the results set will be
101 // retained and used for subsequent calls to this function) as the scoring 100 // retained and used for subsequent calls to this function) as the scoring
102 // of such a large number of candidates may cause perceptible typing response 101 // of such a large number of candidates may cause perceptible typing response
103 // delays in the omnibox. This is likely to occur for short omnibox terms 102 // delays in the omnibox. This is likely to occur for short omnibox terms
104 // such as 'h' and 'w' which will be found in nearly all history candidates. 103 // such as 'h' and 'w' which will be found in nearly all history candidates.
105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); 104 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms);
106 105
107 // Updates or adds an history item to the index if it meets the minimum 106 // Updates or adds an history item to the index if it meets the minimum
108 // 'quick' criteria. 107 // selection criteria.
109 void UpdateURL(URLID row_id, const URLRow& row); 108 void UpdateURL(const URLRow& row);
110 109
111 // Deletes indexing data for an history item. The item may not have actually 110 // Deletes indexing data for an history item. The item may not have actually
112 // been indexed (which is the case if it did not previously meet minimum 111 // been indexed (which is the case if it did not previously meet minimum
113 // 'quick' criteria). 112 // 'quick' criteria).
114 void DeleteURL(URLID row_id); 113 void DeleteURL(const URLRow& row);
114
115 // Notification callback.
116 virtual void Observe(int type,
117 const content::NotificationSource& source,
118 const content::NotificationDetails& details);
115 119
116 private: 120 private:
117 friend class AddHistoryMatch; 121 friend class AddHistoryMatch;
118 friend class InMemoryURLIndexTest; 122 friend class InMemoryURLIndexTest;
119 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); 123 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
120 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath);
121 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities);
123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets);
124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 void operator()(const HistoryID history_id); 181 void operator()(const HistoryID history_id);
178 182
179 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } 183 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; }
180 184
181 private: 185 private:
182 const InMemoryURLIndex& index_; 186 const InMemoryURLIndex& index_;
183 ScoredHistoryMatches scored_matches_; 187 ScoredHistoryMatches scored_matches_;
184 const String16Vector& lower_terms_; 188 const String16Vector& lower_terms_;
185 }; 189 };
186 190
191 // Initialization and Restoration --------------------------------------------
192
193 // Restores the index's private data from the cache, if possible, otherwise
194 // registers to be notified when the history database becomes available.
195 void RestoreFromCache();
196
197 // Restores the index's private data from the cache file stored in the
198 // profile directory and returns true if successful.
199 bool RestoreFromCacheFile();
200
187 // Initializes all index data members in preparation for restoring the index 201 // Initializes all index data members in preparation for restoring the index
188 // from the cache or a complete rebuild from the history database. 202 // from the cache or a complete rebuild from the history database.
189 void ClearPrivateData(); 203 void ClearPrivateData();
190 204
191 // Initializes the whitelist of URL schemes. 205 // Initializes the whitelist of URL schemes.
192 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); 206 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
193 207
194 // URL History indexing support functions. 208 // URL History Indexing ------------------------------------------------------
195 209
196 // Indexes one URL history item. 210 // Indexes one URL history item.
197 void IndexRow(const URLRow& row); 211 void IndexRow(const URLRow& row);
198 212
199 // Parses and indexes the words in the URL and page title of |row|. 213 // Parses and indexes the words in the URL and page title of |row|.
200 void AddRowWordsToIndex(const URLRow& row); 214 void AddRowWordsToIndex(const URLRow& row);
201 215
202 // Removes |row| and all associated words and characters from the index. 216 // Removes |row| and all associated words and characters from the index.
203 void RemoveRowFromIndex(const URLRow& row); 217 void RemoveRowFromIndex(const URLRow& row);
204 218
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 258
245 // Calculates a component score based on position, ordering and total 259 // Calculates a component score based on position, ordering and total
246 // substring match size using metrics recorded in |matches|. |max_length| 260 // substring match size using metrics recorded in |matches|. |max_length|
247 // is the length of the string against which the terms are being searched. 261 // is the length of the string against which the terms are being searched.
248 static int ScoreComponentForMatches(const TermMatches& matches, 262 static int ScoreComponentForMatches(const TermMatches& matches,
249 size_t max_length); 263 size_t max_length);
250 264
251 // Determines if |gurl| has a whitelisted scheme and returns true if so. 265 // Determines if |gurl| has a whitelisted scheme and returns true if so.
252 bool URLSchemeIsWhitelisted(const GURL& gurl) const; 266 bool URLSchemeIsWhitelisted(const GURL& gurl) const;
253 267
268 // Notification handlers.
269 void OnURLVisited(const URLVisitedDetails* details);
270 void OnURLsModified(const URLsModifiedDetails* details);
271 void OnURLsDeleted(const URLsDeletedDetails* details);
272
254 // Utility functions supporting RestoreFromCache and SaveToCache. 273 // Utility functions supporting RestoreFromCache and SaveToCache.
255 274
256 // Construct a file path for the cache file within the same directory where 275 // Construct a file path for the cache file within the same directory where
257 // the history database is kept and saves that path to |file_path|. Returns 276 // the history database is kept and saves that path to |file_path|. Returns
258 // true if |file_path| can be successfully constructed. (This function 277 // true if |file_path| can be successfully constructed. (This function
259 // provided as a hook for unit testing.) 278 // provided as a hook for unit testing.)
260 bool GetCacheFilePath(FilePath* file_path); 279 bool GetCacheFilePath(FilePath* file_path);
261 280
262 // Encode a data structure into the protobuf |cache|. 281 // Encode a data structure into the protobuf |cache|.
263 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; 282 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
264 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; 283 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
265 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 284 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
266 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; 285 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
267 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; 286 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
268 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; 287 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
269 288
270 // Decode a data structure from the protobuf |cache|. Return false if there 289 // Decode a data structure from the protobuf |cache|. Return false if there
271 // is any kind of failure. 290 // is any kind of failure.
272 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); 291 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache);
273 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); 292 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
274 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); 293 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
275 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); 294 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
276 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); 295 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
277 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); 296 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
278 297
298 content::NotificationRegistrar registrar_;
299
300 // The profile with which we are associated.
301 Profile* profile_;
302
279 // Directory where cache file resides. This is, except when unit testing, 303 // Directory where cache file resides. This is, except when unit testing,
280 // the same directory in which the profile's history database is found. It 304 // the same directory in which the profile's history database is found. It
281 // should never be empty. 305 // should never be empty.
282 FilePath history_dir_; 306 FilePath history_dir_;
283 307
284 // The timestamp of when the cache was last saved. This is used to validate 308 // The timestamp of when the cache was last saved. This is used to validate
285 // the transaction journal's applicability to the cache. The timestamp is 309 // the transaction journal's applicability to the cache. The timestamp is
286 // initialized to the NULL time, indicating that the cache was not used with 310 // initialized to the NULL time, indicating that the cache was not used with
287 // the InMemoryURLIndex was last populated. 311 // the InMemoryURLIndex was last populated.
288 base::Time last_saved_; 312 base::Time last_saved_;
(...skipping 16 matching lines...) Expand all
305 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. 329 // TODO(mrossetti): Eliminate once the transition to SQLite has been done.
306 // http://crbug.com/83659 330 // http://crbug.com/83659
307 bool cached_at_shutdown_; 331 bool cached_at_shutdown_;
308 332
309 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); 333 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
310 }; 334 };
311 335
312 } // namespace history 336 } // namespace history
313 337
314 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 338 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698