OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <map> | 10 #include <map> |
11 #include <set> | 11 #include <set> |
12 #include <string> | 12 #include <string> |
13 #include <vector> | 13 #include <vector> |
14 | 14 |
15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
16 #include "base/file_path.h" | 16 #include "base/file_path.h" |
17 #include "base/gtest_prod_util.h" | 17 #include "base/gtest_prod_util.h" |
18 #include "base/memory/linked_ptr.h" | 18 #include "base/memory/linked_ptr.h" |
19 #include "base/memory/scoped_ptr.h" | 19 #include "base/memory/scoped_ptr.h" |
20 #include "base/string16.h" | 20 #include "base/string16.h" |
21 #include "chrome/browser/autocomplete/autocomplete_match.h" | 21 #include "chrome/browser/autocomplete/autocomplete_match.h" |
22 #include "chrome/browser/autocomplete/history_provider_util.h" | 22 #include "chrome/browser/autocomplete/history_provider_util.h" |
23 #include "chrome/browser/history/history_types.h" | 23 #include "chrome/browser/history/history_types.h" |
24 #include "chrome/browser/history/in_memory_url_index_types.h" | 24 #include "chrome/browser/history/in_memory_url_index_types.h" |
25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 26 #include "content/public/browser/notification_observer.h" |
| 27 #include "content/public/browser/notification_registrar.h" |
26 #include "sql/connection.h" | 28 #include "sql/connection.h" |
27 #include "testing/gtest/include/gtest/gtest_prod.h" | 29 #include "testing/gtest/include/gtest/gtest_prod.h" |
28 | 30 |
29 namespace base { | 31 class Profile; |
30 class Time; | |
31 } | |
32 | 32 |
33 namespace in_memory_url_index { | 33 namespace in_memory_url_index { |
34 class InMemoryURLIndexCacheItem; | 34 class InMemoryURLIndexCacheItem; |
35 } | 35 } |
36 | 36 |
37 namespace history { | 37 namespace history { |
38 | 38 |
39 namespace imui = in_memory_url_index; | 39 namespace imui = in_memory_url_index; |
40 | 40 |
41 class URLDatabase; | 41 class URLDatabase; |
| 42 struct URLsDeletedDetails; |
| 43 struct URLsModifiedDetails; |
| 44 struct URLVisitedDetails; |
42 | 45 |
43 // The URL history source. | 46 // The URL history source. |
44 // Holds portions of the URL database in memory in an indexed form. Used to | 47 // Holds portions of the URL database in memory in an indexed form. Used to |
45 // quickly look up matching URLs for a given query string. Used by | 48 // quickly look up matching URLs for a given query string. Used by |
46 // the HistoryURLProvider for inline autocomplete and to provide URL | 49 // the HistoryURLProvider for inline autocomplete and to provide URL |
47 // matches to the omnibox. | 50 // matches to the omnibox. |
48 // | 51 // |
49 // Note about multi-byte codepoints and the data structures in the | 52 // Note about multi-byte codepoints and the data structures in the |
50 // InMemoryURLIndex class: One will quickly notice that no effort is made to | 53 // InMemoryURLIndex class: One will quickly notice that no effort is made to |
51 // insure that multi-byte character boundaries are detected when indexing the | 54 // insure that multi-byte character boundaries are detected when indexing the |
52 // words and characters in the URL history database except when converting | 55 // words and characters in the URL history database except when converting |
53 // URL strings to lowercase. Multi-byte-edness makes no difference when | 56 // URL strings to lowercase. Multi-byte-edness makes no difference when |
54 // indexing or when searching the index as the final filtering of results | 57 // indexing or when searching the index as the final filtering of results |
55 // is dependent on the comparison of a string of bytes, not individual | 58 // is dependent on the comparison of a string of bytes, not individual |
56 // characters. While the lookup of those bytes during a search in the | 59 // characters. While the lookup of those bytes during a search in the |
57 // |char_word_map_| could serve up words in which the individual char16 | 60 // |char_word_map_| could serve up words in which the individual char16 |
58 // occurs as a portion of a composite character the next filtering step | 61 // occurs as a portion of a composite character the next filtering step |
59 // will eliminate such words except in the case where a single character | 62 // will eliminate such words except in the case where a single character |
60 // is being searched on and which character occurs as the second char16 of a | 63 // is being searched on and which character occurs as the second char16 of a |
61 // multi-char16 instance. | 64 // multi-char16 instance. |
62 class InMemoryURLIndex { | 65 class InMemoryURLIndex : public content::NotificationObserver { |
63 public: | 66 public: |
64 // |history_dir| is a path to the directory containing the history database | 67 // |history_dir| is a path to the directory containing the history database |
65 // within the profile wherein the cache and transaction journals will be | 68 // within the profile wherein the cache and transaction journals will be |
66 // stored. | 69 // stored. |
67 explicit InMemoryURLIndex(const FilePath& history_dir); | 70 InMemoryURLIndex(Profile* profile, const FilePath& history_dir); |
68 virtual ~InMemoryURLIndex(); | 71 virtual ~InMemoryURLIndex(); |
69 | 72 |
70 // Opens and indexes the URL history database. | 73 // Restores our index from its cache, if possible. If the cache is not |
| 74 // available then we will register for the NOTIFICATION_HISTORY_LOADED |
| 75 // notifications and then rebuild the index from the history database. |
71 // |languages| gives a list of language encodings with which the history | 76 // |languages| gives a list of language encodings with which the history |
72 // URLs and omnibox searches are interpreted, i.e. when each is broken | 77 // URLs and omnibox searches are interpreted, i.e. when each is broken |
73 // down into words and each word is broken down into characters. | 78 // down into words and each word is broken down into characters. |
74 bool Init(URLDatabase* history_db, const std::string& languages); | 79 void Init(const std::string& languages); |
75 | 80 |
76 // Reloads the history index. Attempts to reload from the cache unless | 81 // Reloads the history index from the history database given in |history_db|. |
77 // |clear_cache| is true. If the cache is unavailable then reload the | 82 void ReloadFromHistory(URLDatabase* history_db); |
78 // index from |history_db|. | |
79 bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache); | |
80 | 83 |
81 // Signals that any outstanding initialization should be canceled and | 84 // Signals that any outstanding initialization should be canceled and |
82 // flushes the cache to disk. | 85 // flushes the cache to disk. |
83 void ShutDown(); | 86 void ShutDown(); |
84 | 87 |
85 // Restores the index's private data from the cache file stored in the | |
86 // profile directory and returns true if successful. | |
87 bool RestoreFromCacheFile(); | |
88 | |
89 // Caches the index private data and writes the cache file to the profile | 88 // Caches the index private data and writes the cache file to the profile |
90 // directory. | 89 // directory. |
91 bool SaveToCacheFile(); | 90 bool SaveToCacheFile(); |
92 | 91 |
93 // Given a vector containing one or more words as string16s, scans the | 92 // Given a vector containing one or more words as string16s, scans the |
94 // history index and return a vector with all scored, matching history items. | 93 // history index and return a vector with all scored, matching history items. |
95 // Each term must occur somewhere in the history item's URL or page title for | 94 // Each term must occur somewhere in the history item's URL or page title for |
96 // the item to qualify; however, the terms do not necessarily have to be | 95 // the item to qualify; however, the terms do not necessarily have to be |
97 // adjacent. Results are sorted with higher scoring items first. Each term | 96 // adjacent. Results are sorted with higher scoring items first. Each term |
98 // from |terms| may contain punctuation but should not contain spaces. | 97 // from |terms| may contain punctuation but should not contain spaces. |
99 // A search request which results in more than |kItemsToScoreLimit| total | 98 // A search request which results in more than |kItemsToScoreLimit| total |
100 // candidate items returns no matches (though the results set will be | 99 // candidate items returns no matches (though the results set will be |
101 // retained and used for subsequent calls to this function) as the scoring | 100 // retained and used for subsequent calls to this function) as the scoring |
102 // of such a large number of candidates may cause perceptible typing response | 101 // of such a large number of candidates may cause perceptible typing response |
103 // delays in the omnibox. This is likely to occur for short omnibox terms | 102 // delays in the omnibox. This is likely to occur for short omnibox terms |
104 // such as 'h' and 'w' which will be found in nearly all history candidates. | 103 // such as 'h' and 'w' which will be found in nearly all history candidates. |
105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 104 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); |
106 | 105 |
107 // Updates or adds an history item to the index if it meets the minimum | 106 // Updates or adds an history item to the index if it meets the minimum |
108 // 'quick' criteria. | 107 // selection criteria. |
109 void UpdateURL(URLID row_id, const URLRow& row); | 108 void UpdateURL(const URLRow& row); |
110 | 109 |
111 // Deletes indexing data for an history item. The item may not have actually | 110 // Deletes indexing data for an history item. The item may not have actually |
112 // been indexed (which is the case if it did not previously meet minimum | 111 // been indexed (which is the case if it did not previously meet minimum |
113 // 'quick' criteria). | 112 // 'quick' criteria). |
114 void DeleteURL(URLID row_id); | 113 void DeleteURL(const URLRow& row); |
| 114 |
| 115 // Notification callback. |
| 116 virtual void Observe(int type, |
| 117 const content::NotificationSource& source, |
| 118 const content::NotificationDetails& details); |
115 | 119 |
116 private: | 120 private: |
117 friend class AddHistoryMatch; | 121 friend class AddHistoryMatch; |
118 friend class InMemoryURLIndexTest; | 122 friend class InMemoryURLIndexTest; |
119 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 123 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
120 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
121 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); |
123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 void operator()(const HistoryID history_id); | 181 void operator()(const HistoryID history_id); |
178 | 182 |
179 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 183 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
180 | 184 |
181 private: | 185 private: |
182 const InMemoryURLIndex& index_; | 186 const InMemoryURLIndex& index_; |
183 ScoredHistoryMatches scored_matches_; | 187 ScoredHistoryMatches scored_matches_; |
184 const String16Vector& lower_terms_; | 188 const String16Vector& lower_terms_; |
185 }; | 189 }; |
186 | 190 |
| 191 // Initialization and Restoration -------------------------------------------- |
| 192 |
| 193 // Restores the index's private data from the cache, if possible, otherwise |
| 194 // registers to be notified when the history database becomes available. |
| 195 void RestoreFromCache(); |
| 196 |
| 197 // Restores the index's private data from the cache file stored in the |
| 198 // profile directory and returns true if successful. |
| 199 bool RestoreFromCacheFile(); |
| 200 |
187 // Initializes all index data members in preparation for restoring the index | 201 // Initializes all index data members in preparation for restoring the index |
188 // from the cache or a complete rebuild from the history database. | 202 // from the cache or a complete rebuild from the history database. |
189 void ClearPrivateData(); | 203 void ClearPrivateData(); |
190 | 204 |
191 // Initializes the whitelist of URL schemes. | 205 // Initializes the whitelist of URL schemes. |
192 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 206 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
193 | 207 |
194 // URL History indexing support functions. | 208 // URL History Indexing ------------------------------------------------------ |
195 | 209 |
196 // Indexes one URL history item. | 210 // Indexes one URL history item. |
197 void IndexRow(const URLRow& row); | 211 void IndexRow(const URLRow& row); |
198 | 212 |
199 // Parses and indexes the words in the URL and page title of |row|. | 213 // Parses and indexes the words in the URL and page title of |row|. |
200 void AddRowWordsToIndex(const URLRow& row); | 214 void AddRowWordsToIndex(const URLRow& row); |
201 | 215 |
202 // Removes |row| and all associated words and characters from the index. | 216 // Removes |row| and all associated words and characters from the index. |
203 void RemoveRowFromIndex(const URLRow& row); | 217 void RemoveRowFromIndex(const URLRow& row); |
204 | 218 |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 | 258 |
245 // Calculates a component score based on position, ordering and total | 259 // Calculates a component score based on position, ordering and total |
246 // substring match size using metrics recorded in |matches|. |max_length| | 260 // substring match size using metrics recorded in |matches|. |max_length| |
247 // is the length of the string against which the terms are being searched. | 261 // is the length of the string against which the terms are being searched. |
248 static int ScoreComponentForMatches(const TermMatches& matches, | 262 static int ScoreComponentForMatches(const TermMatches& matches, |
249 size_t max_length); | 263 size_t max_length); |
250 | 264 |
251 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 265 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
252 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | 266 bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
253 | 267 |
| 268 // Notification handlers. |
| 269 void OnURLVisited(const URLVisitedDetails* details); |
| 270 void OnURLsModified(const URLsModifiedDetails* details); |
| 271 void OnURLsDeleted(const URLsDeletedDetails* details); |
| 272 |
254 // Utility functions supporting RestoreFromCache and SaveToCache. | 273 // Utility functions supporting RestoreFromCache and SaveToCache. |
255 | 274 |
256 // Construct a file path for the cache file within the same directory where | 275 // Construct a file path for the cache file within the same directory where |
257 // the history database is kept and saves that path to |file_path|. Returns | 276 // the history database is kept and saves that path to |file_path|. Returns |
258 // true if |file_path| can be successfully constructed. (This function | 277 // true if |file_path| can be successfully constructed. (This function |
259 // provided as a hook for unit testing.) | 278 // provided as a hook for unit testing.) |
260 bool GetCacheFilePath(FilePath* file_path); | 279 bool GetCacheFilePath(FilePath* file_path); |
261 | 280 |
262 // Encode a data structure into the protobuf |cache|. | 281 // Encode a data structure into the protobuf |cache|. |
263 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 282 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
264 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 283 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
265 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 284 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
266 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 285 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
267 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 286 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
268 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 287 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
269 | 288 |
270 // Decode a data structure from the protobuf |cache|. Return false if there | 289 // Decode a data structure from the protobuf |cache|. Return false if there |
271 // is any kind of failure. | 290 // is any kind of failure. |
272 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 291 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
273 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 292 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
274 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 293 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
275 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 294 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
276 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 295 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
277 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 296 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
278 | 297 |
| 298 content::NotificationRegistrar registrar_; |
| 299 |
| 300 // The profile with which we are associated. |
| 301 Profile* profile_; |
| 302 |
279 // Directory where cache file resides. This is, except when unit testing, | 303 // Directory where cache file resides. This is, except when unit testing, |
280 // the same directory in which the profile's history database is found. It | 304 // the same directory in which the profile's history database is found. It |
281 // should never be empty. | 305 // should never be empty. |
282 FilePath history_dir_; | 306 FilePath history_dir_; |
283 | 307 |
284 // The timestamp of when the cache was last saved. This is used to validate | 308 // The timestamp of when the cache was last saved. This is used to validate |
285 // the transaction journal's applicability to the cache. The timestamp is | 309 // the transaction journal's applicability to the cache. The timestamp is |
286 // initialized to the NULL time, indicating that the cache was not used with | 310 // initialized to the NULL time, indicating that the cache was not used with |
287 // the InMemoryURLIndex was last populated. | 311 // the InMemoryURLIndex was last populated. |
288 base::Time last_saved_; | 312 base::Time last_saved_; |
(...skipping 16 matching lines...) Expand all Loading... |
305 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 329 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
306 // http://crbug.com/83659 | 330 // http://crbug.com/83659 |
307 bool cached_at_shutdown_; | 331 bool cached_at_shutdown_; |
308 | 332 |
309 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 333 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
310 }; | 334 }; |
311 | 335 |
312 } // namespace history | 336 } // namespace history |
313 | 337 |
314 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 338 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
OLD | NEW |