| Index: chrome/browser/history/in_memory_url_index.h
|
| ===================================================================
|
| --- chrome/browser/history/in_memory_url_index.h (revision 106401)
|
| +++ chrome/browser/history/in_memory_url_index.h (working copy)
|
| @@ -21,6 +21,7 @@
|
| #include "chrome/browser/autocomplete/autocomplete_match.h"
|
| #include "chrome/browser/autocomplete/history_provider_util.h"
|
| #include "chrome/browser/history/history_types.h"
|
| +#include "chrome/browser/history/in_memory_url_index_types.h"
|
| #include "chrome/browser/history/in_memory_url_index_cache.pb.h"
|
| #include "sql/connection.h"
|
| #include "testing/gtest/include/gtest/gtest_prod.h"
|
| @@ -41,39 +42,6 @@
|
|
|
| class URLDatabase;
|
|
|
| -// Specifies where an omnibox term occurs within a string. Used for specifying
|
| -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in
|
| -// scoring a result.
|
| -struct TermMatch {
|
| - TermMatch(int term_num, size_t offset, size_t length)
|
| - : term_num(term_num),
|
| - offset(offset),
|
| - length(length) {}
|
| -
|
| - int term_num; // The index of the term in the original search string.
|
| - size_t offset; // The starting offset of the substring match.
|
| - size_t length; // The length of the substring match.
|
| -};
|
| -typedef std::vector<TermMatch> TermMatches;
|
| -
|
| -// Used for intermediate history result operations.
|
| -struct ScoredHistoryMatch : public HistoryMatch {
|
| - ScoredHistoryMatch(); // Required by STL.
|
| - explicit ScoredHistoryMatch(const URLRow& url_info);
|
| - ~ScoredHistoryMatch();
|
| -
|
| - static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
|
| - const ScoredHistoryMatch& m2);
|
| -
|
| - // An interim score taking into consideration location and completeness
|
| - // of the match.
|
| - int raw_score;
|
| - TermMatches url_matches; // Term matches within the URL.
|
| - TermMatches title_matches; // Term matches within the page title.
|
| - bool can_inline; // True if this is a candidate for in-line autocompletion.
|
| -};
|
| -typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
|
| -
|
| // The URL history source.
|
| // Holds portions of the URL database in memory in an indexed form. Used to
|
| // quickly look up matching URLs for a given query string. Used by
|
| @@ -99,11 +67,8 @@
|
| // within the profile wherein the cache and transaction journals will be
|
| // stored.
|
| explicit InMemoryURLIndex(const FilePath& history_dir);
|
| - ~InMemoryURLIndex();
|
| + virtual ~InMemoryURLIndex();
|
|
|
| - // Convenience types
|
| - typedef std::vector<string16> String16Vector;
|
| -
|
| // Opens and indexes the URL history database.
|
| // |languages| gives a list of language encodings with which the history
|
| // URLs and omnibox searches are interpreted, i.e. when each is broken
|
| @@ -150,30 +115,6 @@
|
| // 'quick' criteria).
|
| void DeleteURL(URLID row_id);
|
|
|
| - // Breaks the |uni_string| string down into individual words and return
|
| - // a vector with the individual words in their original order. If
|
| - // |break_on_space| is false then the resulting list will contain only words
|
| - // containing alpha-numeric characters. If |break_on_space| is true then the
|
| - // resulting list will contain strings broken at whitespace.
|
| - //
|
| - // Example:
|
| - // Given: |uni_string|: "http://www.google.com/ harry the rabbit."
|
| - // With |break_on_space| false the returned list will contain:
|
| - // "http", "www", "google", "com", "harry", "the", "rabbit"
|
| - // With |break_on_space| true the returned list will contain:
|
| - // "http://", "www.google.com/", "harry", "the", "rabbit."
|
| - static String16Vector WordVectorFromString16(const string16& uni_string,
|
| - bool break_on_space);
|
| -
|
| - // Extract and return the offsets from |matches|.
|
| - static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
|
| -
|
| - // Replace the offsets in |matches| with those given in |offsets|, deleting
|
| - // any which are npos, and return the updated list of matches.
|
| - static TermMatches ReplaceOffsetsInTermMatches(
|
| - const TermMatches& matches,
|
| - const std::vector<size_t>& offsets);
|
| -
|
| private:
|
| friend class AddHistoryMatch;
|
| friend class InMemoryURLIndexTest;
|
| @@ -185,6 +126,7 @@
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
|
| + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs);
|
|
|
| @@ -194,29 +136,6 @@
|
| // Creating one of me without a history path is not allowed (tests excepted).
|
| InMemoryURLIndex();
|
|
|
| - // Convenience types.
|
| - typedef std::set<string16> String16Set;
|
| - typedef std::set<char16> Char16Set;
|
| - typedef std::vector<char16> Char16Vector;
|
| -
|
| - // An index into list of all of the words we have indexed.
|
| - typedef int WordID;
|
| -
|
| - // A map allowing a WordID to be determined given a word.
|
| - typedef std::map<string16, WordID> WordMap;
|
| -
|
| - // A map from character to word_ids.
|
| - typedef std::set<WordID> WordIDSet; // An index into the WordList.
|
| - typedef std::map<char16, WordIDSet> CharWordIDMap;
|
| -
|
| - // A map from word_id to history item.
|
| - // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.
|
| - // Consider using a smaller type.
|
| - typedef URLID HistoryID;
|
| - typedef std::set<HistoryID> HistoryIDSet;
|
| - typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
|
| -
|
| -
|
| // Support caching of term results so that we can optimize searches which
|
| // build upon a previous search. Each entry in this map represents one
|
| // search term from the most recent search. For example, if the user had
|
| @@ -248,12 +167,6 @@
|
| };
|
| typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap;
|
|
|
| - // TODO(rohitrao): Probably replace this with QueryResults.
|
| - typedef std::vector<URLRow> URLRowVector;
|
| -
|
| - // A map from history_id to the history's URL and title.
|
| - typedef std::map<HistoryID, URLRow> HistoryInfoMap;
|
| -
|
| // A helper class which performs the final filter on each candidate
|
| // history URL match, inserting accepted matches into |scored_matches_|
|
| // and trimming the maximum number of matches to 10.
|
| @@ -280,35 +193,20 @@
|
| // Initializes the whitelist of URL schemes.
|
| static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
|
|
|
| - // Breaks a string down into individual words.
|
| - static String16Set WordSetFromString16(const string16& uni_string);
|
| -
|
| - // Given a set of Char16s, finds words containing those characters.
|
| - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
|
| -
|
| - // Creates a TermMatches which has an entry for each occurrence of the string
|
| - // |term| found in the string |string|. Mark each match with |term_num| so
|
| - // that the resulting TermMatches can be merged with other TermMatches for
|
| - // other terms.
|
| - static TermMatches MatchTermInString(const string16& term,
|
| - const string16& string,
|
| - int term_num);
|
| -
|
| // URL History indexing support functions.
|
|
|
| // Indexes one URL history item.
|
| bool IndexRow(const URLRow& row);
|
|
|
| - // Breaks the |uni_word| string down into its individual characters.
|
| - // Note that this is temporarily intended to work on a single word, but
|
| - // _will_ work on a string of words, perhaps with unexpected results.
|
| - // TODO(mrossetti): Lots of optimizations possible here for not restarting
|
| - // a search if the user is just typing along. Also, change this to uniString
|
| - // and properly handle substring matches, scoring and sorting the results
|
| - // by score. Also, provide the metrics for where the matches occur so that
|
| - // the UI can highlight the matched sections.
|
| - static Char16Set Char16SetFromString16(const string16& uni_word);
|
| + // Parses and indexes the words in the URL and page title of |row|.
|
| + void AddRowWordsToIndex(const URLRow& row);
|
|
|
| + // Removes |row| and all associated words and characters from the index.
|
| + void RemoveRowFromIndex(const URLRow& row);
|
| +
|
| + // Removes all words and characters associated with |row| from the index.
|
| + void RemoveRowWordsFromIndex(const URLRow& row);
|
| +
|
| // Given a single word in |uni_word|, adds a reference for the containing
|
| // history item identified by |history_id| to the index.
|
| void AddWordToIndex(const string16& uni_word, HistoryID history_id);
|
| @@ -352,10 +250,6 @@
|
| static int ScoreComponentForMatches(const TermMatches& matches,
|
| size_t max_length);
|
|
|
| - // Sorts and removes overlapping substring matches from |matches| and
|
| - // returns the cleaned up matches.
|
| - static TermMatches SortAndDeoverlap(const TermMatches& matches);
|
| -
|
| // Determines if |gurl| has a whitelisted scheme and returns true if so.
|
| bool URLSchemeIsWhitelisted(const GURL& gurl) const;
|
|
|
| @@ -395,22 +289,9 @@
|
| // the InMemoryURLIndex was last populated.
|
| base::Time last_saved_;
|
|
|
| - // A list of all of indexed words. The index of a word in this list is the
|
| - // ID of the word in the word_map_. It reduces the memory overhead by
|
| - // replacing a potentially long and repeated string with a simple index.
|
| - // NOTE: A word will _never_ be removed from this vector thus insuring
|
| - // the immutability of the word_id throughout the session, reducing
|
| - // maintenance complexity.
|
| - // TODO(mrossetti): Profile the vector allocation and determine if judicious
|
| - // 'reserve' calls are called for.
|
| - String16Vector word_list_;
|
| + // The index's durable private data.
|
| + scoped_ptr<URLIndexPrivateData> private_data_;
|
|
|
| - int history_item_count_;
|
| - WordMap word_map_;
|
| - CharWordIDMap char_word_map_;
|
| - WordIDHistoryMap word_id_history_map_;
|
| - HistoryInfoMap history_info_map_;
|
| -
|
| // Cache of search terms.
|
| SearchTermCacheMap search_term_cache_;
|
|
|
|
|