Chromium Code Reviews| Index: chrome/browser/history/in_memory_url_index.h |
| =================================================================== |
| --- chrome/browser/history/in_memory_url_index.h (revision 103712) |
| +++ chrome/browser/history/in_memory_url_index.h (working copy) |
| @@ -21,16 +21,15 @@ |
| #include "chrome/browser/autocomplete/autocomplete_match.h" |
| #include "chrome/browser/autocomplete/history_provider_util.h" |
| #include "chrome/browser/history/history_types.h" |
| +#include "chrome/browser/history/in_memory_url_index_types.h" |
| #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| +#include "content/common/notification_observer.h" |
| +#include "content/common/notification_registrar.h" |
| #include "sql/connection.h" |
| #include "testing/gtest/include/gtest/gtest_prod.h" |
| class Profile; |
| -namespace base { |
| -class Time; |
| -} |
| - |
| namespace in_memory_url_index { |
| class InMemoryURLIndexCacheItem; |
| } |
| @@ -40,40 +39,10 @@ |
| namespace imui = in_memory_url_index; |
| class URLDatabase; |
| +struct URLsDeletedDetails; |
| +struct URLsModifiedDetails; |
| +struct URLVisitedDetails; |
| -// Specifies where an omnibox term occurs within a string. Used for specifying |
| -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in |
| -// scoring a result. |
| -struct TermMatch { |
| - TermMatch(int term_num, size_t offset, size_t length) |
| - : term_num(term_num), |
| - offset(offset), |
| - length(length) {} |
| - |
| - int term_num; // The index of the term in the original search string. |
| - size_t offset; // The starting offset of the substring match. |
| - size_t length; // The length of the substring match. |
| -}; |
| -typedef std::vector<TermMatch> TermMatches; |
| - |
| -// Used for intermediate history result operations. |
| -struct ScoredHistoryMatch : public HistoryMatch { |
| - ScoredHistoryMatch(); // Required by STL. |
| - explicit ScoredHistoryMatch(const URLRow& url_info); |
| - ~ScoredHistoryMatch(); |
| - |
| - static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
| - const ScoredHistoryMatch& m2); |
| - |
| - // An interim score taking into consideration location and completeness |
| - // of the match. |
| - int raw_score; |
| - TermMatches url_matches; // Term matches within the URL. |
| - TermMatches title_matches; // Term matches within the page title. |
| - bool can_inline; // True if this is a candidate for in-line autocompletion. |
| -}; |
| -typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
| - |
| // The URL history source. |
| // Holds portions of the URL database in memory in an indexed form. Used to |
| // quickly look up matching URLs for a given query string. Used by |
| @@ -93,17 +62,15 @@ |
| // will eliminate such words except in the case where a single character |
| // is being searched on and which character occurs as the second char16 of a |
| // multi-char16 instance. |
| -class InMemoryURLIndex { |
| +class InMemoryURLIndex : public NotificationObserver { |
| public: |
| // |history_dir| is a path to the directory containing the history database |
| // within the profile wherein the cache and transaction journals will be |
| // stored. |
| - explicit InMemoryURLIndex(const FilePath& history_dir); |
| - ~InMemoryURLIndex(); |
| + explicit InMemoryURLIndex(Profile* profile, |
| + const FilePath& history_dir); |
| + virtual ~InMemoryURLIndex(); |
| - // Convenience types |
| - typedef std::vector<string16> String16Vector; |
| - |
| // Opens and indexes the URL history database. |
| // |languages| gives a list of language encodings with which the history |
| // URLs and omnibox searches are interpreted, i.e. when each is broken |
| @@ -143,37 +110,18 @@ |
| // Updates or adds an history item to the index if it meets the minimum |
| // 'quick' criteria. |
| - void UpdateURL(URLID row_id, const URLRow& row); |
| + void UpdateURL(const URLRow& row); |
| // Deletes indexing data for an history item. The item may not have actually |
| // been indexed (which is the case if it did not previously meet minimum |
| // 'quick' criteria). |
| - void DeleteURL(URLID row_id); |
| + void DeleteURL(const URLRow& row); |
| - // Breaks the |uni_string| string down into individual words and return |
| - // a vector with the individual words in their original order. If |
| - // |break_on_space| is false then the resulting list will contain only words |
| - // containing alpha-numeric characters. If |break_on_space| is true then the |
| - // resulting list will contain strings broken at whitespace. |
| - // |
| - // Example: |
| - // Given: |uni_string|: "http://www.google.com/ harry the rabbit." |
| - // With |break_on_space| false the returned list will contain: |
| - // "http", "www", "google", "com", "harry", "the", "rabbit" |
| - // With |break_on_space| true the returned list will contain: |
| - // "http://", "www.google.com/", "harry", "the", "rabbit." |
| - static String16Vector WordVectorFromString16(const string16& uni_string, |
| - bool break_on_space); |
| + // Notification callback. |
| + virtual void Observe(int type, |
| + const NotificationSource& source, |
| + const NotificationDetails& details); |
| - // Extract and return the offsets from |matches|. |
| - static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); |
| - |
| - // Replace the offsets in |matches| with those given in |offsets|, deleting |
| - // any which are npos, and return the updated list of matches. |
| - static TermMatches ReplaceOffsetsInTermMatches( |
| - const TermMatches& matches, |
| - const std::vector<size_t>& offsets); |
| - |
| private: |
| friend class AddHistoryMatch; |
| friend class InMemoryURLIndexTest; |
| @@ -194,29 +142,6 @@ |
| // Creating one of me without a history path is not allowed (tests excepted). |
| InMemoryURLIndex(); |
| - // Convenience types. |
| - typedef std::set<string16> String16Set; |
| - typedef std::set<char16> Char16Set; |
| - typedef std::vector<char16> Char16Vector; |
| - |
| - // An index into list of all of the words we have indexed. |
| - typedef int WordID; |
| - |
| - // A map allowing a WordID to be determined given a word. |
| - typedef std::map<string16, WordID> WordMap; |
| - |
| - // A map from character to word_ids. |
| - typedef std::set<WordID> WordIDSet; // An index into the WordList. |
| - typedef std::map<char16, WordIDSet> CharWordIDMap; |
| - |
| - // A map from word_id to history item. |
| - // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. |
| - // Consider using a smaller type. |
| - typedef URLID HistoryID; |
| - typedef std::set<HistoryID> HistoryIDSet; |
| - typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; |
| - |
| - |
| // Support caching of term results so that we can optimize searches which |
| // build upon a previous search. Each entry in this map represents one |
| // search term from the most recent search. For example, if the user had |
| @@ -248,12 +173,6 @@ |
| }; |
| typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; |
| - // TODO(rohitrao): Probably replace this with QueryResults. |
| - typedef std::vector<URLRow> URLRowVector; |
| - |
| - // A map from history_id to the history's URL and title. |
| - typedef std::map<HistoryID, URLRow> HistoryInfoMap; |
| - |
| // A helper class which performs the final filter on each candidate |
| // history URL match, inserting accepted matches into |scored_matches_| |
| // and trimming the maximum number of matches to 10. |
| @@ -280,35 +199,20 @@ |
| // Initializes the whitelist of URL schemes. |
| static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| - // Breaks a string down into individual words. |
| - static String16Set WordSetFromString16(const string16& uni_string); |
| - |
| - // Given a set of Char16s, finds words containing those characters. |
| - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| - |
| - // Creates a TermMatches which has an entry for each occurrence of the string |
| - // |term| found in the string |string|. Mark each match with |term_num| so |
| - // that the resulting TermMatches can be merged with other TermMatches for |
| - // other terms. |
| - static TermMatches MatchTermInString(const string16& term, |
| - const string16& string, |
| - int term_num); |
| - |
| // URL History indexing support functions. |
| // Indexes one URL history item. |
| bool IndexRow(const URLRow& row); |
| - // Breaks the |uni_word| string down into its individual characters. |
| - // Note that this is temporarily intended to work on a single word, but |
| - // _will_ work on a string of words, perhaps with unexpected results. |
| - // TODO(mrossetti): Lots of optimizations possible here for not restarting |
| - // a search if the user is just typing along. Also, change this to uniString |
| - // and properly handle substring matches, scoring and sorting the results |
| - // by score. Also, provide the metrics for where the matches occur so that |
| - // the UI can highlight the matched sections. |
| - static Char16Set Char16SetFromString16(const string16& uni_word); |
| + // Parses and indexes the words in the URL and page title of |row|. |
| + void AddRowWordsToIndex(const URLRow& row); |
| + // Removes |row| and all associated words and characters from the index. |
| + void RemoveRowFromIndex(const URLRow& row); |
| + |
| + // Removes all words and characters associated with |row| from the index. |
| + void RemoveRowWordsFromIndex(const URLRow& row); |
| + |
| // Given a single word in |uni_word|, adds a reference for the containing |
| // history item identified by |history_id| to the index. |
| void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
| @@ -352,13 +256,14 @@ |
| static int ScoreComponentForMatches(const TermMatches& matches, |
| size_t max_length); |
| - // Sorts and removes overlapping substring matches from |matches| and |
| - // returns the cleaned up matches. |
| - static TermMatches SortAndDeoverlap(const TermMatches& matches); |
| - |
| // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
| + // Notification handlers. |
| + void OnURLsVisited(const URLVisitedDetails& details); |
|
Peter Kasting
2011/10/05 00:11:42
Nit: Should be singular?
mrossetti
2011/10/07 17:04:14
Yep, changed.
On 2011/10/05 00:11:42, Peter Kasti
|
| + void OnURLsModified(const URLsModifiedDetails& details); |
| + void OnURLsDeleted(const URLsDeletedDetails& details); |
| + |
| // Utility functions supporting RestoreFromCache and SaveToCache. |
| // Construct a file path for the cache file within the same directory where |
| @@ -384,6 +289,9 @@ |
| bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| + Profile* profile_; // The profile may be NULL during unittests. |
| + NotificationRegistrar registrar_; |
| + |
| // Directory where cache file resides. This is, except when unit testing, |
| // the same directory in which the profile's history database is found. It |
| // should never be empty. |
| @@ -395,22 +303,9 @@ |
| // the InMemoryURLIndex was last populated. |
| base::Time last_saved_; |
| - // A list of all of indexed words. The index of a word in this list is the |
| - // ID of the word in the word_map_. It reduces the memory overhead by |
| - // replacing a potentially long and repeated string with a simple index. |
| - // NOTE: A word will _never_ be removed from this vector thus insuring |
| - // the immutability of the word_id throughout the session, reducing |
| - // maintenance complexity. |
| - // TODO(mrossetti): Profile the vector allocation and determine if judicious |
| - // 'reserve' calls are called for. |
| - String16Vector word_list_; |
| + // The index's durable private data. |
| + scoped_ptr<URLIndexPrivateData> private_data_; |
| - int history_item_count_; |
| - WordMap word_map_; |
| - CharWordIDMap char_word_map_; |
| - WordIDHistoryMap word_id_history_map_; |
| - HistoryInfoMap history_info_map_; |
| - |
| // Cache of search terms. |
| SearchTermCacheMap search_term_cache_; |