| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <map> | 10 #include <map> |
| 11 #include <set> | 11 #include <set> |
| 12 #include <string> | 12 #include <string> |
| 13 #include <vector> | 13 #include <vector> |
| 14 | 14 |
| 15 #include "app/sql/connection.h" | 15 #include "app/sql/connection.h" |
| 16 #include "base/basictypes.h" | 16 #include "base/basictypes.h" |
| 17 #include "base/file_path.h" | 17 #include "base/file_path.h" |
| 18 #include "base/gtest_prod_util.h" | 18 #include "base/gtest_prod_util.h" |
| 19 #include "base/linked_ptr.h" | 19 #include "base/linked_ptr.h" |
| 20 #include "base/scoped_ptr.h" | 20 #include "base/scoped_ptr.h" |
| 21 #include "base/string16.h" | 21 #include "base/string16.h" |
| 22 #include "chrome/browser/autocomplete/autocomplete_match.h" |
| 22 #include "chrome/browser/autocomplete/history_provider_util.h" | 23 #include "chrome/browser/autocomplete/history_provider_util.h" |
| 23 #include "chrome/browser/history/history_types.h" | 24 #include "chrome/browser/history/history_types.h" |
| 24 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 25 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 25 #include "testing/gtest/include/gtest/gtest_prod.h" | 26 #include "testing/gtest/include/gtest/gtest_prod.h" |
| 26 | 27 |
| 27 class Profile; | 28 class Profile; |
| 28 | 29 |
| 29 namespace base { | 30 namespace base { |
| 30 class Time; | 31 class Time; |
| 31 } | 32 } |
| 32 | 33 |
| 33 namespace in_memory_url_index { | 34 namespace in_memory_url_index { |
| 34 class InMemoryURLIndexCacheItem; | 35 class InMemoryURLIndexCacheItem; |
| 35 } | 36 } |
| 36 | 37 |
| 37 namespace history { | 38 namespace history { |
| 38 | 39 |
| 39 namespace imui = in_memory_url_index; | 40 namespace imui = in_memory_url_index; |
| 40 | 41 |
| 41 class URLDatabase; | 42 class URLDatabase; |
| 42 | 43 |
| 44 // Specifies where an omnibox term occurs within a string. Used for specifying |
| 45 // highlights in AutocompleteMatches (ACMatchClassifications) and to assist in |
| 46 // scoring a result. |
| 47 struct TermMatch { |
| 48 TermMatch(int term_num, size_t offset, size_t length) |
| 49 : term_num(term_num), |
| 50 offset(offset), |
| 51 length(length) {} |
| 52 |
| 53 int term_num; // The index of the term in the original search string. |
| 54 size_t offset; // The starting offset of the substring match. |
| 55 size_t length; // The length of the substring match. |
| 56 }; |
| 57 typedef std::vector<TermMatch> TermMatches; |
| 58 |
| 43 // Used for intermediate history result operations. | 59 // Used for intermediate history result operations. |
| 44 struct ScoredHistoryMatch : public HistoryMatch { | 60 struct ScoredHistoryMatch : public HistoryMatch { |
| 45 // Required for STL, we don't use this directly. | 61 ScoredHistoryMatch(); // Required by STL. |
| 46 ScoredHistoryMatch(); | 62 explicit ScoredHistoryMatch(const URLRow& url_info); |
| 47 | |
| 48 ScoredHistoryMatch(const URLRow& url_info, | |
| 49 size_t input_location, | |
| 50 bool match_in_scheme, | |
| 51 bool innermost_match, | |
| 52 int score); | |
| 53 | 63 |
| 54 // An interim score taking into consideration location and completeness | 64 // An interim score taking into consideration location and completeness |
| 55 // of the match. | 65 // of the match. |
| 56 int raw_score; | 66 int raw_score; |
| 67 TermMatches url_matches; // Term matches within the URL. |
| 68 TermMatches title_matches; // Term matches within the page title. |
| 69 size_t prefix_adjust; // The length of a prefix which should be ignored. |
| 57 }; | 70 }; |
| 58 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | 71 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
| 59 | 72 |
| 60 // The URL history source. | 73 // The URL history source. |
| 61 // Holds portions of the URL database in memory in an indexed form. Used to | 74 // Holds portions of the URL database in memory in an indexed form. Used to |
| 62 // quickly look up matching URLs for a given query string. Used by | 75 // quickly look up matching URLs for a given query string. Used by |
| 63 // the HistoryURLProvider for inline autocomplete and to provide URL | 76 // the HistoryURLProvider for inline autocomplete and to provide URL |
| 64 // matches to the omnibox. | 77 // matches to the omnibox. |
| 65 // | 78 // |
| 66 // Note about multi-byte codepoints and the data structures in the | 79 // Note about multi-byte codepoints and the data structures in the |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 bool RestoreFromCacheFile(); | 120 bool RestoreFromCacheFile(); |
| 108 | 121 |
| 109 // Caches the index private data and writes the cache file to the profile | 122 // Caches the index private data and writes the cache file to the profile |
| 110 // directory. | 123 // directory. |
| 111 bool SaveToCacheFile(); | 124 bool SaveToCacheFile(); |
| 112 | 125 |
| 113 // Given a vector containing one or more words as string16s, scans the | 126 // Given a vector containing one or more words as string16s, scans the |
| 114 // history index and return a vector with all scored, matching history items. | 127 // history index and return a vector with all scored, matching history items. |
| 115 // Each term must occur somewhere in the history item for the item to | 128 // Each term must occur somewhere in the history item for the item to |
| 116 // qualify; however, the terms do not necessarily have to be adjacent. | 129 // qualify; however, the terms do not necessarily have to be adjacent. |
| 117 // Results are sorted with higher scoring items first. | 130 // Results are sorted with higher scoring items first. Each term from |terms| |
| 131 // may contain punctuation but should not contain spaces. |
| 118 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 132 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); |
| 119 | 133 |
| 120 // Returns the date threshold for considering an history item as significant. | |
| 121 static base::Time RecentThreshold(); | |
| 122 | |
| 123 // Updates or adds an history item to the index if it meets the minimum | 134 // Updates or adds an history item to the index if it meets the minimum |
| 124 // 'quick' criteria. | 135 // 'quick' criteria. |
| 125 void UpdateURL(URLID row_id, const URLRow& row); | 136 void UpdateURL(URLID row_id, const URLRow& row); |
| 126 | 137 |
| 127 // Deletes indexing data for an history item. The item may not have actually | 138 // Deletes indexing data for an history item. The item may not have actually |
| 128 // been indexed (which is the case if it did not previously meet minimum | 139 // been indexed (which is the case if it did not previously meet minimum |
| 129 // 'quick' criteria). | 140 // 'quick' criteria). |
| 130 void DeleteURL(URLID row_id); | 141 void DeleteURL(URLID row_id); |
| 131 | 142 |
| 143 // Breaks the |uni_string| string down into individual words and return |
| 144 // a vector with the individual words in their original order. Break on |
| 145 // whitespace if |break_on_space| also on special characters. |
| 146 static String16Vector WordVectorFromString16(const string16& uni_string, |
| 147 bool break_on_space); |
| 148 |
| 132 private: | 149 private: |
| 133 friend class AddHistoryMatch; | 150 friend class AddHistoryMatch; |
| 134 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Initialization); | 151 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 135 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | |
| 136 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | |
| 137 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 152 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
| 138 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 153 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 154 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); |
| 155 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 156 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
| 157 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 158 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 139 | 159 |
| 140 // Signals that there are no previously cached results for the typed term. | 160 // Signals that there are no previously cached results for the typed term. |
| 141 static const size_t kNoCachedResultForTerm; | 161 static const size_t kNoCachedResultForTerm; |
| 142 | 162 |
| 143 // Creating one of me without a history path is not allowed (tests excepted). | 163 // Creating one of me without a history path is not allowed (tests excepted). |
| 144 InMemoryURLIndex(); | 164 InMemoryURLIndex(); |
| 145 | 165 |
| 146 // Convenience types. | 166 // Convenience types. |
| 147 typedef std::set<string16> String16Set; | 167 typedef std::set<string16> String16Set; |
| 148 typedef std::set<char16> Char16Set; | 168 typedef std::set<char16> Char16Set; |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 217 WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars); | 237 WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars); |
| 218 | 238 |
| 219 // Given a vector of Char16s in |uni_chars|, compare those characters, in | 239 // Given a vector of Char16s in |uni_chars|, compare those characters, in |
| 220 // order, with the previously searched term, returning the index of the | 240 // order, with the previously searched term, returning the index of the |
| 221 // cached results in the term_char_word_set_cache_ of the set with best | 241 // cached results in the term_char_word_set_cache_ of the set with best |
| 222 // matching number of characters. Returns kNoCachedResultForTerm if there | 242 // matching number of characters. Returns kNoCachedResultForTerm if there |
| 223 // was no match at all (i.e. the first character of |uni-chars| is not the | 243 // was no match at all (i.e. the first character of |uni-chars| is not the |
| 224 // same as the character of the first entry in term_char_word_set_cache_). | 244 // same as the character of the first entry in term_char_word_set_cache_). |
| 225 size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars); | 245 size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars); |
| 226 | 246 |
| 247 // Creates a TermMatches which has an entry for each occurrence of the string |
| 248 // |term| found in the string |string|. Mark each match with |term_num| so |
| 249 // that the resulting TermMatches can be merged with other TermMatches for |
| 250 // other terms. |
| 251 static TermMatches MatchTermInString(const string16& term, |
| 252 const string16& string, |
| 253 int term_num); |
| 254 |
| 227 // URL History indexing support functions. | 255 // URL History indexing support functions. |
| 228 | 256 |
| 229 // Indexes one URL history item. | 257 // Indexes one URL history item. |
| 230 bool IndexRow(const URLRow& row); | 258 bool IndexRow(const URLRow& row); |
| 231 | 259 |
| 232 // Breaks a string down into unique, individual characters in the order | 260 // Breaks a string down into unique, individual characters in the order |
| 233 // in which the characters are first encountered in the |uni_word| string. | 261 // in which the characters are first encountered in the |uni_word| string. |
| 234 static Char16Vector Char16VectorFromString16(const string16& uni_word); | 262 static Char16Vector Char16VectorFromString16(const string16& uni_word); |
| 235 | 263 |
| 236 // Breaks the |uni_word| string down into its individual characters. | 264 // Breaks the |uni_word| string down into its individual characters. |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 269 HistoryIDSet HistoryIDsForTerm(const string16& uni_word); | 297 HistoryIDSet HistoryIDsForTerm(const string16& uni_word); |
| 270 | 298 |
| 271 // Calculates a raw score for this history item by first determining | 299 // Calculates a raw score for this history item by first determining |
| 272 // if all of the terms in |terms_vector| occur in |row| and, if so, | 300 // if all of the terms in |terms_vector| occur in |row| and, if so, |
| 273 // calculating a raw score based on 1) starting position of each term | 301 // calculating a raw score based on 1) starting position of each term |
| 274 // in the user input, 2) completeness of each term's match, 3) ordering | 302 // in the user input, 2) completeness of each term's match, 3) ordering |
| 275 // of the occurrence of each term (i.e. they appear in order), 4) last | 303 // of the occurrence of each term (i.e. they appear in order), 4) last |
| 276 // visit time, and 5) number of visits. | 304 // visit time, and 5) number of visits. |
| 277 // This raw score allows the results to be ordered and can be used | 305 // This raw score allows the results to be ordered and can be used |
| 278 // to influence the final score calculated by the client of this | 306 // to influence the final score calculated by the client of this |
| 279 // index. Return the starting location of the first term in | 307 // index. Returns a ScoredHistoryMatch structure with the raw score and |
| 280 // |first_term_location|. | 308 // substring matching metrics. |
| 281 static int RawScoreForURL(const URLRow& row, | 309 static ScoredHistoryMatch ScoredMatchForURL( |
| 282 const String16Vector& terms_vector, | 310 const URLRow& row, |
| 283 size_t* first_term_location); | 311 const String16Vector& terms_vector); |
| 312 |
| 313 // Calculates a partial raw score based on position, ordering and total |
| 314 // substring match size using metrics recorded in |matches|. |max_length| |
| 315 // is the length of the string against which the terms are being searched. |
| 316 static int RawScoreForMatches(const TermMatches& matches, |
| 317 size_t max_length); |
| 318 |
| 319 // Sorts and removes overlapping substring matches from |matches| and |
| 320 // returns the cleaned up matches. |
| 321 static TermMatches SortAndDeoverlap(const TermMatches& matches); |
| 284 | 322 |
| 285 // Utility functions supporting RestoreFromCache and SaveToCache. | 323 // Utility functions supporting RestoreFromCache and SaveToCache. |
| 286 | 324 |
| 287 // Construct a file path for the cache file within the same directory where | 325 // Construct a file path for the cache file within the same directory where |
| 288 // the history database is kept and saves that path to |file_path|. Returns | 326 // the history database is kept and saves that path to |file_path|. Returns |
| 289 // true if |file_path| can be successfully constructed. (This function | 327 // true if |file_path| can be successfully constructed. (This function |
| 290 // provided as a hook for unit testing.) | 328 // provided as a hook for unit testing.) |
| 291 bool GetCacheFilePath(FilePath* file_path); | 329 bool GetCacheFilePath(FilePath* file_path); |
| 292 | 330 |
| 293 // Encode a data structure into the protobuf |cache|. | 331 // Encode a data structure into the protobuf |cache|. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 335 TermCharWordSetVector term_char_word_set_cache_; | 373 TermCharWordSetVector term_char_word_set_cache_; |
| 336 HistoryInfoMap history_info_map_; | 374 HistoryInfoMap history_info_map_; |
| 337 std::string languages_; | 375 std::string languages_; |
| 338 | 376 |
| 339 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 377 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
| 340 }; | 378 }; |
| 341 | 379 |
| 342 } // namespace history | 380 } // namespace history |
| 343 | 381 |
| 344 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 382 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| OLD | NEW |