Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <map> | 10 #include <map> |
| 11 #include <set> | 11 #include <set> |
| 12 #include <string> | 12 #include <string> |
| 13 #include <vector> | 13 #include <vector> |
| 14 | 14 |
| 15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
| 16 #include "base/file_path.h" | 16 #include "base/file_path.h" |
| 17 #include "base/gtest_prod_util.h" | 17 #include "base/gtest_prod_util.h" |
| 18 #include "base/memory/linked_ptr.h" | 18 #include "base/memory/linked_ptr.h" |
| 19 #include "base/memory/scoped_ptr.h" | 19 #include "base/memory/scoped_ptr.h" |
| 20 #include "base/string16.h" | 20 #include "base/string16.h" |
| 21 #include "chrome/browser/autocomplete/autocomplete_match.h" | 21 #include "chrome/browser/autocomplete/autocomplete_match.h" |
| 22 #include "chrome/browser/autocomplete/history_provider_util.h" | 22 #include "chrome/browser/autocomplete/history_provider_util.h" |
| 23 #include "chrome/browser/history/history_types.h" | 23 #include "chrome/browser/history/history_types.h" |
| 24 #include "chrome/browser/history/in_memory_url_index_types.h" | |
| 24 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 26 #include "content/common/notification_observer.h" | |
| 27 #include "content/common/notification_registrar.h" | |
| 25 #include "sql/connection.h" | 28 #include "sql/connection.h" |
| 26 #include "testing/gtest/include/gtest/gtest_prod.h" | 29 #include "testing/gtest/include/gtest/gtest_prod.h" |
| 27 | 30 |
| 28 class Profile; | 31 class Profile; |
| 29 | 32 |
| 30 namespace base { | |
| 31 class Time; | |
| 32 } | |
| 33 | |
| 34 namespace in_memory_url_index { | 33 namespace in_memory_url_index { |
| 35 class InMemoryURLIndexCacheItem; | 34 class InMemoryURLIndexCacheItem; |
| 36 } | 35 } |
| 37 | 36 |
| 38 namespace history { | 37 namespace history { |
| 39 | 38 |
| 40 namespace imui = in_memory_url_index; | 39 namespace imui = in_memory_url_index; |
| 41 | 40 |
| 42 class URLDatabase; | 41 class URLDatabase; |
| 43 | 42 struct URLsDeletedDetails; |
| 44 // Specifies where an omnibox term occurs within a string. Used for specifying | 43 struct URLsModifiedDetails; |
| 45 // highlights in AutocompleteMatches (ACMatchClassifications) and to assist in | 44 struct URLVisitedDetails; |
| 46 // scoring a result. | |
| 47 struct TermMatch { | |
| 48 TermMatch(int term_num, size_t offset, size_t length) | |
| 49 : term_num(term_num), | |
| 50 offset(offset), | |
| 51 length(length) {} | |
| 52 | |
| 53 int term_num; // The index of the term in the original search string. | |
| 54 size_t offset; // The starting offset of the substring match. | |
| 55 size_t length; // The length of the substring match. | |
| 56 }; | |
| 57 typedef std::vector<TermMatch> TermMatches; | |
| 58 | |
| 59 // Used for intermediate history result operations. | |
| 60 struct ScoredHistoryMatch : public HistoryMatch { | |
| 61 ScoredHistoryMatch(); // Required by STL. | |
| 62 explicit ScoredHistoryMatch(const URLRow& url_info); | |
| 63 ~ScoredHistoryMatch(); | |
| 64 | |
| 65 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, | |
| 66 const ScoredHistoryMatch& m2); | |
| 67 | |
| 68 // An interim score taking into consideration location and completeness | |
| 69 // of the match. | |
| 70 int raw_score; | |
| 71 TermMatches url_matches; // Term matches within the URL. | |
| 72 TermMatches title_matches; // Term matches within the page title. | |
| 73 bool can_inline; // True if this is a candidate for in-line autocompletion. | |
| 74 }; | |
| 75 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | |
| 76 | 45 |
| 77 // The URL history source. | 46 // The URL history source. |
| 78 // Holds portions of the URL database in memory in an indexed form. Used to | 47 // Holds portions of the URL database in memory in an indexed form. Used to |
| 79 // quickly look up matching URLs for a given query string. Used by | 48 // quickly look up matching URLs for a given query string. Used by |
| 80 // the HistoryURLProvider for inline autocomplete and to provide URL | 49 // the HistoryURLProvider for inline autocomplete and to provide URL |
| 81 // matches to the omnibox. | 50 // matches to the omnibox. |
| 82 // | 51 // |
| 83 // Note about multi-byte codepoints and the data structures in the | 52 // Note about multi-byte codepoints and the data structures in the |
| 84 // InMemoryURLIndex class: One will quickly notice that no effort is made to | 53 // InMemoryURLIndex class: One will quickly notice that no effort is made to |
| 85 // insure that multi-byte character boundaries are detected when indexing the | 54 // insure that multi-byte character boundaries are detected when indexing the |
| 86 // words and characters in the URL history database except when converting | 55 // words and characters in the URL history database except when converting |
| 87 // URL strings to lowercase. Multi-byte-edness makes no difference when | 56 // URL strings to lowercase. Multi-byte-edness makes no difference when |
| 88 // indexing or when searching the index as the final filtering of results | 57 // indexing or when searching the index as the final filtering of results |
| 89 // is dependent on the comparison of a string of bytes, not individual | 58 // is dependent on the comparison of a string of bytes, not individual |
| 90 // characters. While the lookup of those bytes during a search in the | 59 // characters. While the lookup of those bytes during a search in the |
| 91 // |char_word_map_| could serve up words in which the individual char16 | 60 // |char_word_map_| could serve up words in which the individual char16 |
| 92 // occurs as a portion of a composite character the next filtering step | 61 // occurs as a portion of a composite character the next filtering step |
| 93 // will eliminate such words except in the case where a single character | 62 // will eliminate such words except in the case where a single character |
| 94 // is being searched on and which character occurs as the second char16 of a | 63 // is being searched on and which character occurs as the second char16 of a |
| 95 // multi-char16 instance. | 64 // multi-char16 instance. |
| 96 class InMemoryURLIndex { | 65 class InMemoryURLIndex : public NotificationObserver { |
| 97 public: | 66 public: |
| 98 // |history_dir| is a path to the directory containing the history database | 67 // |history_dir| is a path to the directory containing the history database |
| 99 // within the profile wherein the cache and transaction journals will be | 68 // within the profile wherein the cache and transaction journals will be |
| 100 // stored. | 69 // stored. |
| 101 explicit InMemoryURLIndex(const FilePath& history_dir); | 70 explicit InMemoryURLIndex(Profile* profile, |
| 102 ~InMemoryURLIndex(); | 71 const FilePath& history_dir); |
| 103 | 72 virtual ~InMemoryURLIndex(); |
| 104 // Convenience types | |
| 105 typedef std::vector<string16> String16Vector; | |
| 106 | 73 |
| 107 // Opens and indexes the URL history database. | 74 // Opens and indexes the URL history database. |
| 108 // |languages| gives a list of language encodings with which the history | 75 // |languages| gives a list of language encodings with which the history |
| 109 // URLs and omnibox searches are interpreted, i.e. when each is broken | 76 // URLs and omnibox searches are interpreted, i.e. when each is broken |
| 110 // down into words and each word is broken down into characters. | 77 // down into words and each word is broken down into characters. |
| 111 bool Init(URLDatabase* history_db, const std::string& languages); | 78 bool Init(URLDatabase* history_db, const std::string& languages); |
| 112 | 79 |
| 113 // Reloads the history index. Attempts to reload from the cache unless | 80 // Reloads the history index. Attempts to reload from the cache unless |
| 114 // |clear_cache| is true. If the cache is unavailable then reload the | 81 // |clear_cache| is true. If the cache is unavailable then reload the |
| 115 // index from |history_db|. | 82 // index from |history_db|. |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 136 // A search request which results in more than |kItemsToScoreLimit| total | 103 // A search request which results in more than |kItemsToScoreLimit| total |
| 137 // candidate items returns no matches (though the results set will be | 104 // candidate items returns no matches (though the results set will be |
| 138 // retained and used for subsequent calls to this function) as the scoring | 105 // retained and used for subsequent calls to this function) as the scoring |
| 139 // of such a large number of candidates may cause perceptible typing response | 106 // of such a large number of candidates may cause perceptible typing response |
| 140 // delays in the omnibox. This is likely to occur for short omnibox terms | 107 // delays in the omnibox. This is likely to occur for short omnibox terms |
| 141 // such as 'h' and 'w' which will be found in nearly all history candidates. | 108 // such as 'h' and 'w' which will be found in nearly all history candidates. |
| 142 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 109 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); |
| 143 | 110 |
| 144 // Updates or adds an history item to the index if it meets the minimum | 111 // Updates or adds an history item to the index if it meets the minimum |
| 145 // 'quick' criteria. | 112 // 'quick' criteria. |
| 146 void UpdateURL(URLID row_id, const URLRow& row); | 113 void UpdateURL(const URLRow& row); |
| 147 | 114 |
| 148 // Deletes indexing data for an history item. The item may not have actually | 115 // Deletes indexing data for an history item. The item may not have actually |
| 149 // been indexed (which is the case if it did not previously meet minimum | 116 // been indexed (which is the case if it did not previously meet minimum |
| 150 // 'quick' criteria). | 117 // 'quick' criteria). |
| 151 void DeleteURL(URLID row_id); | 118 void DeleteURL(const URLRow& row); |
| 152 | 119 |
| 153 // Breaks the |uni_string| string down into individual words and return | 120 // Notification callback. |
| 154 // a vector with the individual words in their original order. If | 121 virtual void Observe(int type, |
| 155 // |break_on_space| is false then the resulting list will contain only words | 122 const NotificationSource& source, |
| 156 // containing alpha-numeric characters. If |break_on_space| is true then the | 123 const NotificationDetails& details); |
| 157 // resulting list will contain strings broken at whitespace. | |
| 158 // | |
| 159 // Example: | |
| 160 // Given: |uni_string|: "http://www.google.com/ harry the rabbit." | |
| 161 // With |break_on_space| false the returned list will contain: | |
| 162 // "http", "www", "google", "com", "harry", "the", "rabbit" | |
| 163 // With |break_on_space| true the returned list will contain: | |
| 164 // "http://", "www.google.com/", "harry", "the", "rabbit." | |
| 165 static String16Vector WordVectorFromString16(const string16& uni_string, | |
| 166 bool break_on_space); | |
| 167 | |
| 168 // Extract and return the offsets from |matches|. | |
| 169 static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); | |
| 170 | |
| 171 // Replace the offsets in |matches| with those given in |offsets|, deleting | |
| 172 // any which are npos, and return the updated list of matches. | |
| 173 static TermMatches ReplaceOffsetsInTermMatches( | |
| 174 const TermMatches& matches, | |
| 175 const std::vector<size_t>& offsets); | |
| 176 | 124 |
| 177 private: | 125 private: |
| 178 friend class AddHistoryMatch; | 126 friend class AddHistoryMatch; |
| 179 friend class InMemoryURLIndexTest; | 127 friend class InMemoryURLIndexTest; |
| 180 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 128 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 181 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
| 182 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 183 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); |
| 184 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 132 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
| 185 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 133 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 186 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); | 134 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
| 187 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 135 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 188 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 136 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 189 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 137 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| 190 | 138 |
| 191 // Signals that there are no previously cached results for the typed term. | 139 // Signals that there are no previously cached results for the typed term. |
| 192 static const size_t kNoCachedResultForTerm; | 140 static const size_t kNoCachedResultForTerm; |
| 193 | 141 |
| 194 // Creating one of me without a history path is not allowed (tests excepted). | 142 // Creating one of me without a history path is not allowed (tests excepted). |
| 195 InMemoryURLIndex(); | 143 InMemoryURLIndex(); |
| 196 | 144 |
| 197 // Convenience types. | |
| 198 typedef std::set<string16> String16Set; | |
| 199 typedef std::set<char16> Char16Set; | |
| 200 typedef std::vector<char16> Char16Vector; | |
| 201 | |
| 202 // An index into list of all of the words we have indexed. | |
| 203 typedef int WordID; | |
| 204 | |
| 205 // A map allowing a WordID to be determined given a word. | |
| 206 typedef std::map<string16, WordID> WordMap; | |
| 207 | |
| 208 // A map from character to word_ids. | |
| 209 typedef std::set<WordID> WordIDSet; // An index into the WordList. | |
| 210 typedef std::map<char16, WordIDSet> CharWordIDMap; | |
| 211 | |
| 212 // A map from word_id to history item. | |
| 213 // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. | |
| 214 // Consider using a smaller type. | |
| 215 typedef URLID HistoryID; | |
| 216 typedef std::set<HistoryID> HistoryIDSet; | |
| 217 typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; | |
| 218 | |
| 219 | |
| 220 // Support caching of term results so that we can optimize searches which | 145 // Support caching of term results so that we can optimize searches which |
| 221 // build upon a previous search. Each entry in this map represents one | 146 // build upon a previous search. Each entry in this map represents one |
| 222 // search term from the most recent search. For example, if the user had | 147 // search term from the most recent search. For example, if the user had |
| 223 // typed "google blog trans" and then typed an additional 'l' (at the end, | 148 // typed "google blog trans" and then typed an additional 'l' (at the end, |
| 224 // of course) then there would be four items in the cache: 'blog', 'google', | 149 // of course) then there would be four items in the cache: 'blog', 'google', |
| 225 // 'trans', and 'transl'. All would be marked as being in use except for the | 150 // 'trans', and 'transl'. All would be marked as being in use except for the |
| 226 // 'trans' item; its cached data would have been used when optimizing the | 151 // 'trans' item; its cached data would have been used when optimizing the |
| 227 // construction of the search results candidates for 'transl' but then would | 152 // construction of the search results candidates for 'transl' but then would |
| 228 // no longer needed. | 153 // no longer needed. |
| 229 // | 154 // |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 241 SearchTermCacheItem(); | 166 SearchTermCacheItem(); |
| 242 | 167 |
| 243 ~SearchTermCacheItem(); | 168 ~SearchTermCacheItem(); |
| 244 | 169 |
| 245 WordIDSet word_id_set_; | 170 WordIDSet word_id_set_; |
| 246 HistoryIDSet history_id_set_; | 171 HistoryIDSet history_id_set_; |
| 247 bool used_; // True if this item has been used for the current term search. | 172 bool used_; // True if this item has been used for the current term search. |
| 248 }; | 173 }; |
| 249 typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; | 174 typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; |
| 250 | 175 |
| 251 // TODO(rohitrao): Probably replace this with QueryResults. | |
| 252 typedef std::vector<URLRow> URLRowVector; | |
| 253 | |
| 254 // A map from history_id to the history's URL and title. | |
| 255 typedef std::map<HistoryID, URLRow> HistoryInfoMap; | |
| 256 | |
| 257 // A helper class which performs the final filter on each candidate | 176 // A helper class which performs the final filter on each candidate |
| 258 // history URL match, inserting accepted matches into |scored_matches_| | 177 // history URL match, inserting accepted matches into |scored_matches_| |
| 259 // and trimming the maximum number of matches to 10. | 178 // and trimming the maximum number of matches to 10. |
| 260 class AddHistoryMatch : public std::unary_function<HistoryID, void> { | 179 class AddHistoryMatch : public std::unary_function<HistoryID, void> { |
| 261 public: | 180 public: |
| 262 AddHistoryMatch(const InMemoryURLIndex& index, | 181 AddHistoryMatch(const InMemoryURLIndex& index, |
| 263 const String16Vector& lower_terms); | 182 const String16Vector& lower_terms); |
| 264 ~AddHistoryMatch(); | 183 ~AddHistoryMatch(); |
| 265 | 184 |
| 266 void operator()(const HistoryID history_id); | 185 void operator()(const HistoryID history_id); |
| 267 | 186 |
| 268 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 187 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
| 269 | 188 |
| 270 private: | 189 private: |
| 271 const InMemoryURLIndex& index_; | 190 const InMemoryURLIndex& index_; |
| 272 ScoredHistoryMatches scored_matches_; | 191 ScoredHistoryMatches scored_matches_; |
| 273 const String16Vector& lower_terms_; | 192 const String16Vector& lower_terms_; |
| 274 }; | 193 }; |
| 275 | 194 |
| 276 // Initializes all index data members in preparation for restoring the index | 195 // Initializes all index data members in preparation for restoring the index |
| 277 // from the cache or a complete rebuild from the history database. | 196 // from the cache or a complete rebuild from the history database. |
| 278 void ClearPrivateData(); | 197 void ClearPrivateData(); |
| 279 | 198 |
| 280 // Initializes the whitelist of URL schemes. | 199 // Initializes the whitelist of URL schemes. |
| 281 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 200 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| 282 | 201 |
| 283 // Breaks a string down into individual words. | |
| 284 static String16Set WordSetFromString16(const string16& uni_string); | |
| 285 | |
| 286 // Given a set of Char16s, finds words containing those characters. | |
| 287 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | |
| 288 | |
| 289 // Creates a TermMatches which has an entry for each occurrence of the string | |
| 290 // |term| found in the string |string|. Mark each match with |term_num| so | |
| 291 // that the resulting TermMatches can be merged with other TermMatches for | |
| 292 // other terms. | |
| 293 static TermMatches MatchTermInString(const string16& term, | |
| 294 const string16& string, | |
| 295 int term_num); | |
| 296 | |
| 297 // URL History indexing support functions. | 202 // URL History indexing support functions. |
| 298 | 203 |
| 299 // Indexes one URL history item. | 204 // Indexes one URL history item. |
| 300 bool IndexRow(const URLRow& row); | 205 bool IndexRow(const URLRow& row); |
| 301 | 206 |
| 302 // Breaks the |uni_word| string down into its individual characters. | 207 // Parses and indexes the words in the URL and page title of |row|. |
| 303 // Note that this is temporarily intended to work on a single word, but | 208 void AddRowWordsToIndex(const URLRow& row); |
| 304 // _will_ work on a string of words, perhaps with unexpected results. | 209 |
| 305 // TODO(mrossetti): Lots of optimizations possible here for not restarting | 210 // Removes |row| and all associated words and characters from the index. |
| 306 // a search if the user is just typing along. Also, change this to uniString | 211 void RemoveRowFromIndex(const URLRow& row); |
| 307 // and properly handle substring matches, scoring and sorting the results | 212 |
| 308 // by score. Also, provide the metrics for where the matches occur so that | 213 // Removes all words and characters associated with |row| from the index. |
| 309 // the UI can highlight the matched sections. | 214 void RemoveRowWordsFromIndex(const URLRow& row); |
| 310 static Char16Set Char16SetFromString16(const string16& uni_word); | |
| 311 | 215 |
| 312 // Given a single word in |uni_word|, adds a reference for the containing | 216 // Given a single word in |uni_word|, adds a reference for the containing |
| 313 // history item identified by |history_id| to the index. | 217 // history item identified by |history_id| to the index. |
| 314 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 218 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
| 315 | 219 |
| 316 // Updates an existing entry in the word/history index by adding the | 220 // Updates an existing entry in the word/history index by adding the |
| 317 // |history_id| to set for |word_id| in the word_id_history_map_. | 221 // |history_id| to set for |word_id| in the word_id_history_map_. |
| 318 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 222 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
| 319 | 223 |
| 320 // Creates a new entry in the word/history map for |word_id| and add | 224 // Creates a new entry in the word/history map for |word_id| and add |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 345 static ScoredHistoryMatch ScoredMatchForURL( | 249 static ScoredHistoryMatch ScoredMatchForURL( |
| 346 const URLRow& row, | 250 const URLRow& row, |
| 347 const String16Vector& terms_vector); | 251 const String16Vector& terms_vector); |
| 348 | 252 |
| 349 // Calculates a component score based on position, ordering and total | 253 // Calculates a component score based on position, ordering and total |
| 350 // substring match size using metrics recorded in |matches|. |max_length| | 254 // substring match size using metrics recorded in |matches|. |max_length| |
| 351 // is the length of the string against which the terms are being searched. | 255 // is the length of the string against which the terms are being searched. |
| 352 static int ScoreComponentForMatches(const TermMatches& matches, | 256 static int ScoreComponentForMatches(const TermMatches& matches, |
| 353 size_t max_length); | 257 size_t max_length); |
| 354 | 258 |
| 355 // Sorts and removes overlapping substring matches from |matches| and | |
| 356 // returns the cleaned up matches. | |
| 357 static TermMatches SortAndDeoverlap(const TermMatches& matches); | |
| 358 | |
| 359 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 259 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 360 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | 260 bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
| 361 | 261 |
| 262 // Notification handlers. | |
| 263 void OnURLsVisited(const URLVisitedDetails& details); | |
|
Peter Kasting
2011/10/05 00:11:42
Nit: Should be singular?
mrossetti
2011/10/07 17:04:14
Yep, changed.
On 2011/10/05 00:11:42, Peter Kasti
| |
| 264 void OnURLsModified(const URLsModifiedDetails& details); | |
| 265 void OnURLsDeleted(const URLsDeletedDetails& details); | |
| 266 | |
| 362 // Utility functions supporting RestoreFromCache and SaveToCache. | 267 // Utility functions supporting RestoreFromCache and SaveToCache. |
| 363 | 268 |
| 364 // Construct a file path for the cache file within the same directory where | 269 // Construct a file path for the cache file within the same directory where |
| 365 // the history database is kept and saves that path to |file_path|. Returns | 270 // the history database is kept and saves that path to |file_path|. Returns |
| 366 // true if |file_path| can be successfully constructed. (This function | 271 // true if |file_path| can be successfully constructed. (This function |
| 367 // provided as a hook for unit testing.) | 272 // provided as a hook for unit testing.) |
| 368 bool GetCacheFilePath(FilePath* file_path); | 273 bool GetCacheFilePath(FilePath* file_path); |
| 369 | 274 |
| 370 // Encode a data structure into the protobuf |cache|. | 275 // Encode a data structure into the protobuf |cache|. |
| 371 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 276 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 372 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 277 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 373 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 278 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 374 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 279 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 375 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 280 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 376 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 281 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 377 | 282 |
| 378 // Decode a data structure from the protobuf |cache|. Return false if there | 283 // Decode a data structure from the protobuf |cache|. Return false if there |
| 379 // is any kind of failure. | 284 // is any kind of failure. |
| 380 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 285 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
| 381 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 286 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
| 382 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 287 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 383 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 288 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 384 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 289 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 385 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 290 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 386 | 291 |
| 292 Profile* profile_; // The profile may be NULL during unittests. | |
| 293 NotificationRegistrar registrar_; | |
| 294 | |
| 387 // Directory where cache file resides. This is, except when unit testing, | 295 // Directory where cache file resides. This is, except when unit testing, |
| 388 // the same directory in which the profile's history database is found. It | 296 // the same directory in which the profile's history database is found. It |
| 389 // should never be empty. | 297 // should never be empty. |
| 390 FilePath history_dir_; | 298 FilePath history_dir_; |
| 391 | 299 |
| 392 // The timestamp of when the cache was last saved. This is used to validate | 300 // The timestamp of when the cache was last saved. This is used to validate |
| 393 // the transaction journal's applicability to the cache. The timestamp is | 301 // the transaction journal's applicability to the cache. The timestamp is |
| 394 // initialized to the NULL time, indicating that the cache was not used with | 302 // initialized to the NULL time, indicating that the cache was not used with |
| 395 // the InMemoryURLIndex was last populated. | 303 // the InMemoryURLIndex was last populated. |
| 396 base::Time last_saved_; | 304 base::Time last_saved_; |
| 397 | 305 |
| 398 // A list of all of indexed words. The index of a word in this list is the | 306 // The index's durable private data. |
| 399 // ID of the word in the word_map_. It reduces the memory overhead by | 307 scoped_ptr<URLIndexPrivateData> private_data_; |
| 400 // replacing a potentially long and repeated string with a simple index. | |
| 401 // NOTE: A word will _never_ be removed from this vector thus insuring | |
| 402 // the immutability of the word_id throughout the session, reducing | |
| 403 // maintenance complexity. | |
| 404 // TODO(mrossetti): Profile the vector allocation and determine if judicious | |
| 405 // 'reserve' calls are called for. | |
| 406 String16Vector word_list_; | |
| 407 | |
| 408 int history_item_count_; | |
| 409 WordMap word_map_; | |
| 410 CharWordIDMap char_word_map_; | |
| 411 WordIDHistoryMap word_id_history_map_; | |
| 412 HistoryInfoMap history_info_map_; | |
| 413 | 308 |
| 414 // Cache of search terms. | 309 // Cache of search terms. |
| 415 SearchTermCacheMap search_term_cache_; | 310 SearchTermCacheMap search_term_cache_; |
| 416 | 311 |
| 417 // Languages used during the word-breaking process during indexing. | 312 // Languages used during the word-breaking process during indexing. |
| 418 std::string languages_; | 313 std::string languages_; |
| 419 | 314 |
| 420 // Only URLs with a whitelisted scheme are indexed. | 315 // Only URLs with a whitelisted scheme are indexed. |
| 421 std::set<std::string> scheme_whitelist_; | 316 std::set<std::string> scheme_whitelist_; |
| 422 | 317 |
| 423 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 318 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
| 424 }; | 319 }; |
| 425 | 320 |
| 426 } // namespace history | 321 } // namespace history |
| 427 | 322 |
| 428 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 323 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| OLD | NEW |