OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
11 #include "chrome/browser/history/in_memory_url_index_types.h" | 11 #include "chrome/browser/history/in_memory_url_index_types.h" |
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
13 | 13 |
14 class InMemoryURLIndexTest; | |
15 | |
14 namespace in_memory_url_index { | 16 namespace in_memory_url_index { |
15 class InMemoryURLIndexCacheItem; | 17 class InMemoryURLIndexCacheItem; |
16 } | 18 } |
17 | 19 |
18 namespace history { | 20 namespace history { |
19 | 21 |
20 namespace imui = in_memory_url_index; | 22 namespace imui = in_memory_url_index; |
21 | 23 |
22 // A structure describing the InMemoryURLIndex's internal data and providing for | 24 // A structure describing the InMemoryURLIndex's internal data and providing for |
23 // restoring, rebuilding and updating that internal data. | 25 // restoring, rebuilding and updating that internal data. |
24 class URLIndexPrivateData { | 26 class URLIndexPrivateData { |
25 public: | 27 public: |
26 URLIndexPrivateData(); | 28 URLIndexPrivateData(); |
27 ~URLIndexPrivateData(); | 29 ~URLIndexPrivateData(); |
28 | 30 |
29 private: | 31 private: |
32 friend class AddHistoryMatch; | |
33 friend class HistoryQuickProviderTest; | |
30 friend class InMemoryURLIndex; | 34 friend class InMemoryURLIndex; |
31 friend class AddHistoryMatch; | |
32 friend class InMemoryURLIndexTest; | 35 friend class InMemoryURLIndexTest; |
33 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 36 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
34 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 37 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
35 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
36 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 39 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
37 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 40 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
39 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 42 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
40 | 43 |
44 #if 1 | |
45 void DUMP() const; | |
46 #endif | |
Peter Kasting
2012/01/14 00:12:49
This seems like it should have disappeared before
mrossetti
2012/03/03 05:05:56
Done.
| |
47 | |
41 // Support caching of term results so that we can optimize searches which | 48 // Support caching of term results so that we can optimize searches which |
42 // build upon a previous search. Each entry in this map represents one | 49 // build upon a previous search. Each entry in this map represents one |
43 // search term from the most recent search. For example, if the user had | 50 // search term from the most recent search. For example, if the user had |
44 // typed "google blog trans" and then typed an additional 'l' (at the end, | 51 // typed "google blog trans" and then typed an additional 'l' (at the end, |
45 // of course) then there would be four items in the cache: 'blog', 'google', | 52 // of course) then there would be four items in the cache: 'blog', 'google', |
46 // 'trans', and 'transl'. All would be marked as being in use except for the | 53 // 'trans', and 'transl'. All would be marked as being in use except for the |
47 // 'trans' item; its cached data would have been used when optimizing the | 54 // 'trans' item; its cached data would have been used when optimizing the |
48 // construction of the search results candidates for 'transl' but then would | 55 // construction of the search results candidates for 'transl' but then would |
49 // no longer needed. | 56 // no longer needed. |
50 // | 57 // |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
117 // descending score. The full results set (i.e. beyond the | 124 // descending score. The full results set (i.e. beyond the |
118 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 125 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
119 // to this function. | 126 // to this function. |
120 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | 127 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
121 | 128 |
122 // Sets the |languages| to a list of language encodings with which the history | 129 // Sets the |languages| to a list of language encodings with which the history |
123 // URLs and omnibox searches are interpreted, i.e. how each is broken | 130 // URLs and omnibox searches are interpreted, i.e. how each is broken |
124 // down into words and each word is broken down into characters. | 131 // down into words and each word is broken down into characters. |
125 void set_languages(const std::string& languages) { languages_ = languages; } | 132 void set_languages(const std::string& languages) { languages_ = languages; } |
126 | 133 |
127 // Restores the index's private data from the cache file stored in the | 134 // Constructs a new object by restoring its contents from the file at |path|. |
128 // profile directory and returns true if successful. | 135 // Returns the new URLIndexPrivateData which on success will contain the |
129 bool RestoreFromFile(const FilePath& file_path); | 136 // restored data but upon failure will be empty. |
137 static URLIndexPrivateData* RestoreFromFile(const FilePath& path); | |
138 | |
139 // Constructs a new object by rebuilding its contents from the history | |
140 // database in |db|. Returns the new URLIndexPrivateData which on success | |
141 // will contain the rebuilt data but upon failure will be empty. | |
142 static URLIndexPrivateData* RebuildFromHistory(URLDatabase* history_db); | |
130 | 143 |
131 // Caches the index private data and writes the cache file to the profile | 144 // Caches the index private data and writes the cache file to the profile |
132 // directory. | 145 // directory. |
133 bool SaveToFile(const FilePath& file_path); | 146 bool SaveToFile(const FilePath& file_path); |
134 | 147 |
135 // Reloads the history index from |history_db|. | |
136 bool ReloadFromHistory(URLDatabase* history_db); | |
137 | |
138 // Initializes all index data members in preparation for restoring the index | 148 // Initializes all index data members in preparation for restoring the index |
139 // from the cache or a complete rebuild from the history database. | 149 // from the cache or a complete rebuild from the history database. |
140 void Clear(); | 150 void Clear(); |
141 | 151 |
142 // Adds |word_id| to |history_id|'s entry in the history/word map, | 152 // Adds |word_id| to |history_id|'s entry in the history/word map, |
143 // creating a new entry if one does not already exist. | 153 // creating a new entry if one does not already exist. |
144 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 154 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
145 | 155 |
146 // Given a set of Char16s, finds words containing those characters. | 156 // Given a set of Char16s, finds words containing those characters. |
147 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 157 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
148 | 158 |
149 // Initializes the whitelist of URL schemes. | 159 // Initializes the whitelist of URL schemes. |
150 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 160 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
151 | 161 |
152 // URL History indexing support functions. | 162 // URL History indexing support functions. |
153 | 163 |
154 // Indexes one URL history item. | 164 // Indexes one URL history item. |
155 void IndexRow(const URLRow& row); | 165 void IndexRow(const URLRow& row); |
156 | 166 |
157 // Updates or adds an history item to the index if it meets the minimum | 167 // Updates or adds the history item in |row| to the index if it meets the |
158 // 'quick' criteria. | 168 // minimum 'quick' criteria. |
159 void UpdateURL(URLID row_id, const URLRow& row); | 169 void UpdateURL(const URLRow& row); |
160 | 170 |
161 // Deletes indexing data for an history item. The item may not have actually | 171 // Deletes indexing data for the history item with the URL given in |url|. |
162 // been indexed (which is the case if it did not previously meet minimum | 172 // The item may not have actually been indexed, which is the case if it did |
163 // 'quick' criteria). | 173 // not previously meet minimum 'quick' criteria. |
164 void DeleteURL(URLID row_id); | 174 void DeleteURL(const GURL& url); |
165 | 175 |
166 // Parses and indexes the words in the URL and page title of |row|. | 176 // Parses and indexes the words in the URL and page title of |row|. |
167 void AddRowWordsToIndex(const URLRow& row); | 177 void AddRowWordsToIndex(const URLRow& row); |
168 | 178 |
169 // Removes |row| and all associated words and characters from the index. | 179 // Removes |row| and all associated words and characters from the index. |
170 void RemoveRowFromIndex(const URLRow& row); | 180 void RemoveRowFromIndex(const URLRow& row); |
171 | 181 |
172 // Removes all words and characters associated with |row| from the index. | 182 // Removes all words and characters associated with |row| from the index. |
173 void RemoveRowWordsFromIndex(const URLRow& row); | 183 void RemoveRowWordsFromIndex(const URLRow& row); |
174 | 184 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
208 static ScoredHistoryMatch ScoredMatchForURL( | 218 static ScoredHistoryMatch ScoredMatchForURL( |
209 const URLRow& row, | 219 const URLRow& row, |
210 const String16Vector& terms_vector); | 220 const String16Vector& terms_vector); |
211 | 221 |
212 // Calculates a component score based on position, ordering and total | 222 // Calculates a component score based on position, ordering and total |
213 // substring match size using metrics recorded in |matches|. |max_length| | 223 // substring match size using metrics recorded in |matches|. |max_length| |
214 // is the length of the string against which the terms are being searched. | 224 // is the length of the string against which the terms are being searched. |
215 static int ScoreComponentForMatches(const TermMatches& matches, | 225 static int ScoreComponentForMatches(const TermMatches& matches, |
216 size_t max_length); | 226 size_t max_length); |
217 | 227 |
218 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
219 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
220 | |
221 // Encode a data structure into the protobuf |cache|. | 228 // Encode a data structure into the protobuf |cache|. |
222 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 229 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
223 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 230 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
224 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 231 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
225 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 232 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
226 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 233 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
227 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 234 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
228 | 235 |
229 // Decode a data structure from the protobuf |cache|. Return false if there | 236 // Decode a data structure from the protobuf |cache|. Return false if there |
230 // is any kind of failure. | 237 // is any kind of failure. |
231 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 238 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
232 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 239 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
233 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 240 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
234 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 241 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
235 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 242 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
236 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 243 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
237 | 244 |
245 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
246 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
247 | |
238 // Cache of search terms. | 248 // Cache of search terms. |
239 SearchTermCacheMap search_term_cache_; | 249 SearchTermCacheMap search_term_cache_; |
240 | 250 |
241 // Languages used during the word-breaking process during indexing. | 251 // Languages used during the word-breaking process during indexing. |
242 std::string languages_; | 252 std::string languages_; |
243 | 253 |
244 // Only URLs with a whitelisted scheme are indexed. | 254 // Only URLs with a whitelisted scheme are indexed. |
245 std::set<std::string> scheme_whitelist_; | 255 std::set<std::string> scheme_whitelist_; |
246 | 256 |
247 // Start of data members that are cached ------------------------------------- | 257 // Start of data members that are cached ------------------------------------- |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
287 // Used for unit testing only. Records the number of candidate history items | 297 // Used for unit testing only. Records the number of candidate history items |
288 // at three stages in the index searching process. | 298 // at three stages in the index searching process. |
289 size_t pre_filter_item_count_; // After word index is queried. | 299 size_t pre_filter_item_count_; // After word index is queried. |
290 size_t post_filter_item_count_; // After trimming large result set. | 300 size_t post_filter_item_count_; // After trimming large result set. |
291 size_t post_scoring_item_count_; // After performing final filter/scoring. | 301 size_t post_scoring_item_count_; // After performing final filter/scoring. |
292 }; | 302 }; |
293 | 303 |
294 } // namespace history | 304 } // namespace history |
295 | 305 |
296 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 306 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |