Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <map> | 10 #include <map> |
| 11 #include <set> | 11 #include <set> |
| 12 #include <string> | 12 #include <string> |
| 13 #include <vector> | 13 #include <vector> |
| 14 | 14 |
| 15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
| 16 #include "base/file_path.h" | 16 #include "base/file_path.h" |
| 17 #include "base/gtest_prod_util.h" | 17 #include "base/gtest_prod_util.h" |
| 18 #include "base/memory/linked_ptr.h" | 18 #include "base/memory/linked_ptr.h" |
| 19 #include "base/memory/scoped_ptr.h" | 19 #include "base/memory/scoped_ptr.h" |
| 20 #include "base/string16.h" | 20 #include "base/string16.h" |
| 21 #include "chrome/browser/autocomplete/autocomplete_match.h" | 21 #include "chrome/browser/autocomplete/autocomplete_match.h" |
| 22 #include "chrome/browser/autocomplete/history_provider_util.h" | 22 #include "chrome/browser/autocomplete/history_provider_util.h" |
| 23 #include "chrome/browser/history/history_types.h" | 23 #include "chrome/browser/history/history_types.h" |
| 24 #include "chrome/browser/history/in_memory_url_index_types.h" | 24 #include "chrome/browser/history/in_memory_url_index_types.h" |
| 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 26 #include "content/public/browser/notification_observer.h" | |
| 27 #include "content/public/browser/notification_registrar.h" | |
| 26 #include "sql/connection.h" | 28 #include "sql/connection.h" |
| 27 #include "testing/gtest/include/gtest/gtest_prod.h" | 29 #include "testing/gtest/include/gtest/gtest_prod.h" |
| 28 | 30 |
| 29 class Profile; | 31 class Profile; |
| 30 | 32 |
| 31 namespace base { | |
| 32 class Time; | |
| 33 } | |
| 34 | |
| 35 namespace in_memory_url_index { | 33 namespace in_memory_url_index { |
| 36 class InMemoryURLIndexCacheItem; | 34 class InMemoryURLIndexCacheItem; |
| 37 } | 35 } |
| 38 | 36 |
| 39 namespace history { | 37 namespace history { |
| 40 | 38 |
| 41 namespace imui = in_memory_url_index; | 39 namespace imui = in_memory_url_index; |
| 42 | 40 |
| 43 class URLDatabase; | 41 class URLDatabase; |
| 42 struct URLsDeletedDetails; | |
| 43 struct URLsModifiedDetails; | |
| 44 struct URLVisitedDetails; | |
| 44 | 45 |
| 45 // The URL history source. | 46 // The URL history source. |
| 46 // Holds portions of the URL database in memory in an indexed form. Used to | 47 // Holds portions of the URL database in memory in an indexed form. Used to |
| 47 // quickly look up matching URLs for a given query string. Used by | 48 // quickly look up matching URLs for a given query string. Used by |
| 48 // the HistoryURLProvider for inline autocomplete and to provide URL | 49 // the HistoryURLProvider for inline autocomplete and to provide URL |
| 49 // matches to the omnibox. | 50 // matches to the omnibox. |
| 50 // | 51 // |
| 51 // Note about multi-byte codepoints and the data structures in the | 52 // Note about multi-byte codepoints and the data structures in the |
| 52 // InMemoryURLIndex class: One will quickly notice that no effort is made to | 53 // InMemoryURLIndex class: One will quickly notice that no effort is made to |
| 53 // insure that multi-byte character boundaries are detected when indexing the | 54 // insure that multi-byte character boundaries are detected when indexing the |
| 54 // words and characters in the URL history database except when converting | 55 // words and characters in the URL history database except when converting |
| 55 // URL strings to lowercase. Multi-byte-edness makes no difference when | 56 // URL strings to lowercase. Multi-byte-edness makes no difference when |
| 56 // indexing or when searching the index as the final filtering of results | 57 // indexing or when searching the index as the final filtering of results |
| 57 // is dependent on the comparison of a string of bytes, not individual | 58 // is dependent on the comparison of a string of bytes, not individual |
| 58 // characters. While the lookup of those bytes during a search in the | 59 // characters. While the lookup of those bytes during a search in the |
| 59 // |char_word_map_| could serve up words in which the individual char16 | 60 // |char_word_map_| could serve up words in which the individual char16 |
| 60 // occurs as a portion of a composite character the next filtering step | 61 // occurs as a portion of a composite character the next filtering step |
| 61 // will eliminate such words except in the case where a single character | 62 // will eliminate such words except in the case where a single character |
| 62 // is being searched on and which character occurs as the second char16 of a | 63 // is being searched on and which character occurs as the second char16 of a |
| 63 // multi-char16 instance. | 64 // multi-char16 instance. |
| 64 class InMemoryURLIndex { | 65 class InMemoryURLIndex : public content::NotificationObserver { |
| 65 public: | 66 public: |
| 66 // |history_dir| is a path to the directory containing the history database | 67 // |history_dir| is a path to the directory containing the history database |
| 67 // within the profile wherein the cache and transaction journals will be | 68 // within the profile wherein the cache and transaction journals will be |
| 68 // stored. | 69 // stored. |
| 69 explicit InMemoryURLIndex(const FilePath& history_dir); | 70 explicit InMemoryURLIndex(Profile* profile, |
|
Peter Kasting
2011/11/21 19:07:50
Nit: "explicit" no longer needed
mrossetti
2011/11/21 21:53:25
Done.
| |
| 71 const FilePath& history_dir); | |
| 70 virtual ~InMemoryURLIndex(); | 72 virtual ~InMemoryURLIndex(); |
| 71 | 73 |
| 72 // Opens and indexes the URL history database. | 74 // Restores our index from its cache, if possible. If the cache is not |
| 75 // available then we will register for the NOTIFICATION_HISTORY_LOADED | |
| 76 // notifications and then rebuild the index from the history database. | |
| 73 // |languages| gives a list of language encodings with which the history | 77 // |languages| gives a list of language encodings with which the history |
| 74 // URLs and omnibox searches are interpreted, i.e. when each is broken | 78 // URLs and omnibox searches are interpreted, i.e. when each is broken |
| 75 // down into words and each word is broken down into characters. | 79 // down into words and each word is broken down into characters. |
| 76 bool Init(URLDatabase* history_db, const std::string& languages); | 80 void Init(const std::string& languages); |
| 77 | 81 |
| 78 // Reloads the history index. Attempts to reload from the cache unless | 82 // Reloads the history index from the history database given in |history_db|. |
| 79 // |clear_cache| is true. If the cache is unavailable then reload the | 83 void ReloadFromHistory(URLDatabase* history_db); |
| 80 // index from |history_db|. | |
| 81 bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache); | |
| 82 | 84 |
| 83 // Signals that any outstanding initialization should be canceled and | 85 // Signals that any outstanding initialization should be canceled and |
| 84 // flushes the cache to disk. | 86 // flushes the cache to disk. |
| 85 void ShutDown(); | 87 void ShutDown(); |
| 86 | 88 |
| 87 // Restores the index's private data from the cache file stored in the | |
| 88 // profile directory and returns true if successful. | |
| 89 bool RestoreFromCacheFile(); | |
| 90 | |
| 91 // Caches the index private data and writes the cache file to the profile | 89 // Caches the index private data and writes the cache file to the profile |
| 92 // directory. | 90 // directory. |
| 93 bool SaveToCacheFile(); | 91 bool SaveToCacheFile(); |
| 94 | 92 |
| 95 // Given a vector containing one or more words as string16s, scans the | 93 // Given a vector containing one or more words as string16s, scans the |
| 96 // history index and return a vector with all scored, matching history items. | 94 // history index and return a vector with all scored, matching history items. |
| 97 // Each term must occur somewhere in the history item's URL or page title for | 95 // Each term must occur somewhere in the history item's URL or page title for |
| 98 // the item to qualify; however, the terms do not necessarily have to be | 96 // the item to qualify; however, the terms do not necessarily have to be |
| 99 // adjacent. Results are sorted with higher scoring items first. Each term | 97 // adjacent. Results are sorted with higher scoring items first. Each term |
| 100 // from |terms| may contain punctuation but should not contain spaces. | 98 // from |terms| may contain punctuation but should not contain spaces. |
| 101 // A search request which results in more than |kItemsToScoreLimit| total | 99 // A search request which results in more than |kItemsToScoreLimit| total |
| 102 // candidate items returns no matches (though the results set will be | 100 // candidate items returns no matches (though the results set will be |
| 103 // retained and used for subsequent calls to this function) as the scoring | 101 // retained and used for subsequent calls to this function) as the scoring |
| 104 // of such a large number of candidates may cause perceptible typing response | 102 // of such a large number of candidates may cause perceptible typing response |
| 105 // delays in the omnibox. This is likely to occur for short omnibox terms | 103 // delays in the omnibox. This is likely to occur for short omnibox terms |
| 106 // such as 'h' and 'w' which will be found in nearly all history candidates. | 104 // such as 'h' and 'w' which will be found in nearly all history candidates. |
| 107 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); |
| 108 | 106 |
| 109 // Updates or adds an history item to the index if it meets the minimum | 107 // Updates or adds an history item to the index if it meets the minimum |
| 110 // 'quick' criteria. | 108 // selection criteria. |
| 111 void UpdateURL(URLID row_id, const URLRow& row); | 109 void UpdateURL(const URLRow& row); |
| 112 | 110 |
| 113 // Deletes indexing data for an history item. The item may not have actually | 111 // Deletes indexing data for an history item. The item may not have actually |
| 114 // been indexed (which is the case if it did not previously meet minimum | 112 // been indexed (which is the case if it did not previously meet minimum |
| 115 // 'quick' criteria). | 113 // 'quick' criteria). |
| 116 void DeleteURL(URLID row_id); | 114 void DeleteURL(const URLRow& row); |
| 115 | |
| 116 // Notification callback. | |
| 117 virtual void Observe(int type, | |
| 118 const content::NotificationSource& source, | |
| 119 const content::NotificationDetails& details); | |
| 117 | 120 |
| 118 private: | 121 private: |
| 119 friend class AddHistoryMatch; | 122 friend class AddHistoryMatch; |
| 120 friend class InMemoryURLIndexTest; | 123 friend class InMemoryURLIndexTest; |
| 121 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 124 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
| 123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); |
| 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
| 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 179 void operator()(const HistoryID history_id); | 182 void operator()(const HistoryID history_id); |
| 180 | 183 |
| 181 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 184 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
| 182 | 185 |
| 183 private: | 186 private: |
| 184 const InMemoryURLIndex& index_; | 187 const InMemoryURLIndex& index_; |
| 185 ScoredHistoryMatches scored_matches_; | 188 ScoredHistoryMatches scored_matches_; |
| 186 const String16Vector& lower_terms_; | 189 const String16Vector& lower_terms_; |
| 187 }; | 190 }; |
| 188 | 191 |
| 192 // Initialization and Restoration -------------------------------------------- | |
| 193 | |
| 194 // Restores the index's private data from the cache, if possible, otherwise | |
| 195 // register to be notified when the history database becomes available. | |
|
Peter Kasting
2011/11/21 19:07:50
Nit: register -> registers
mrossetti
2011/11/21 21:53:25
Done.
| |
| 196 void RestoreFromCache(); | |
| 197 | |
| 198 // Restores the index's private data from the cache file stored in the | |
| 199 // profile directory and returns true if successful. | |
| 200 bool RestoreFromCacheFile(); | |
| 201 | |
| 189 // Initializes all index data members in preparation for restoring the index | 202 // Initializes all index data members in preparation for restoring the index |
| 190 // from the cache or a complete rebuild from the history database. | 203 // from the cache or a complete rebuild from the history database. |
| 191 void ClearPrivateData(); | 204 void ClearPrivateData(); |
| 192 | 205 |
| 193 // Initializes the whitelist of URL schemes. | 206 // Initializes the whitelist of URL schemes. |
| 194 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 207 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| 195 | 208 |
| 196 // URL History indexing support functions. | 209 // URL History Indexing ------------------------------------------------------ |
| 197 | 210 |
| 198 // Indexes one URL history item. | 211 // Indexes one URL history item. |
| 199 void IndexRow(const URLRow& row); | 212 void IndexRow(const URLRow& row); |
| 200 | 213 |
| 201 // Parses and indexes the words in the URL and page title of |row|. | 214 // Parses and indexes the words in the URL and page title of |row|. |
| 202 void AddRowWordsToIndex(const URLRow& row); | 215 void AddRowWordsToIndex(const URLRow& row); |
| 203 | 216 |
| 204 // Removes |row| and all associated words and characters from the index. | 217 // Removes |row| and all associated words and characters from the index. |
| 205 void RemoveRowFromIndex(const URLRow& row); | 218 void RemoveRowFromIndex(const URLRow& row); |
| 206 | 219 |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 246 | 259 |
| 247 // Calculates a component score based on position, ordering and total | 260 // Calculates a component score based on position, ordering and total |
| 248 // substring match size using metrics recorded in |matches|. |max_length| | 261 // substring match size using metrics recorded in |matches|. |max_length| |
| 249 // is the length of the string against which the terms are being searched. | 262 // is the length of the string against which the terms are being searched. |
| 250 static int ScoreComponentForMatches(const TermMatches& matches, | 263 static int ScoreComponentForMatches(const TermMatches& matches, |
| 251 size_t max_length); | 264 size_t max_length); |
| 252 | 265 |
| 253 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 266 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 254 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | 267 bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
| 255 | 268 |
| 269 // Notification handlers. | |
| 270 void OnURLVisited(const URLVisitedDetails* details); | |
| 271 void OnURLsModified(const URLsModifiedDetails* details); | |
| 272 void OnURLsDeleted(const URLsDeletedDetails* details); | |
| 273 | |
| 256 // Utility functions supporting RestoreFromCache and SaveToCache. | 274 // Utility functions supporting RestoreFromCache and SaveToCache. |
| 257 | 275 |
| 258 // Construct a file path for the cache file within the same directory where | 276 // Construct a file path for the cache file within the same directory where |
| 259 // the history database is kept and saves that path to |file_path|. Returns | 277 // the history database is kept and saves that path to |file_path|. Returns |
| 260 // true if |file_path| can be successfully constructed. (This function | 278 // true if |file_path| can be successfully constructed. (This function |
| 261 // provided as a hook for unit testing.) | 279 // provided as a hook for unit testing.) |
| 262 bool GetCacheFilePath(FilePath* file_path); | 280 bool GetCacheFilePath(FilePath* file_path); |
| 263 | 281 |
| 264 // Encode a data structure into the protobuf |cache|. | 282 // Encode a data structure into the protobuf |cache|. |
| 265 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 283 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 266 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 284 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 267 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 285 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 268 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 286 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 269 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 287 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 270 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 288 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 271 | 289 |
| 272 // Decode a data structure from the protobuf |cache|. Return false if there | 290 // Decode a data structure from the protobuf |cache|. Return false if there |
| 273 // is any kind of failure. | 291 // is any kind of failure. |
| 274 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 292 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
| 275 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 293 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
| 276 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 294 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 277 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 295 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 278 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 296 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 279 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 297 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 280 | 298 |
| 299 content::NotificationRegistrar registrar_; | |
| 300 | |
| 301 // The profile with which we are associated. | |
| 302 Profile* profile_; | |
| 303 | |
| 281 // Directory where cache file resides. This is, except when unit testing, | 304 // Directory where cache file resides. This is, except when unit testing, |
| 282 // the same directory in which the profile's history database is found. It | 305 // the same directory in which the profile's history database is found. It |
| 283 // should never be empty. | 306 // should never be empty. |
| 284 FilePath history_dir_; | 307 FilePath history_dir_; |
| 285 | 308 |
| 286 // The timestamp of when the cache was last saved. This is used to validate | 309 // The timestamp of when the cache was last saved. This is used to validate |
| 287 // the transaction journal's applicability to the cache. The timestamp is | 310 // the transaction journal's applicability to the cache. The timestamp is |
| 288 // initialized to the NULL time, indicating that the cache was not used with | 311 // initialized to the NULL time, indicating that the cache was not used with |
| 289 // the InMemoryURLIndex was last populated. | 312 // the InMemoryURLIndex was last populated. |
| 290 base::Time last_saved_; | 313 base::Time last_saved_; |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 307 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 330 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
| 308 // http://crbug.com/83659 | 331 // http://crbug.com/83659 |
| 309 bool cached_at_shutdown_; | 332 bool cached_at_shutdown_; |
| 310 | 333 |
| 311 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 334 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
| 312 }; | 335 }; |
| 313 | 336 |
| 314 } // namespace history | 337 } // namespace history |
| 315 | 338 |
| 316 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 339 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| OLD | NEW |