Index: chrome/browser/history/in_memory_url_index.h |
=================================================================== |
--- chrome/browser/history/in_memory_url_index.h (revision 105497) |
+++ chrome/browser/history/in_memory_url_index.h (working copy) |
@@ -21,16 +21,15 @@ |
#include "chrome/browser/autocomplete/autocomplete_match.h" |
#include "chrome/browser/autocomplete/history_provider_util.h" |
#include "chrome/browser/history/history_types.h" |
+#include "chrome/browser/history/in_memory_url_index_types.h" |
#include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
+#include "content/common/notification_observer.h" |
+#include "content/common/notification_registrar.h" |
#include "sql/connection.h" |
#include "testing/gtest/include/gtest/gtest_prod.h" |
class Profile; |
-namespace base { |
-class Time; |
-} |
- |
namespace in_memory_url_index { |
class InMemoryURLIndexCacheItem; |
} |
@@ -40,40 +39,10 @@ |
namespace imui = in_memory_url_index; |
class URLDatabase; |
+struct URLsDeletedDetails; |
+struct URLsModifiedDetails; |
+struct URLVisitedDetails; |
-// Specifies where an omnibox term occurs within a string. Used for specifying |
-// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in |
-// scoring a result. |
-struct TermMatch { |
- TermMatch(int term_num, size_t offset, size_t length) |
- : term_num(term_num), |
- offset(offset), |
- length(length) {} |
- |
- int term_num; // The index of the term in the original search string. |
- size_t offset; // The starting offset of the substring match. |
- size_t length; // The length of the substring match. |
-}; |
-typedef std::vector<TermMatch> TermMatches; |
- |
-// Used for intermediate history result operations. |
-struct ScoredHistoryMatch : public HistoryMatch { |
- ScoredHistoryMatch(); // Required by STL. |
- explicit ScoredHistoryMatch(const URLRow& url_info); |
- ~ScoredHistoryMatch(); |
- |
- static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
- const ScoredHistoryMatch& m2); |
- |
- // An interim score taking into consideration location and completeness |
- // of the match. |
- int raw_score; |
- TermMatches url_matches; // Term matches within the URL. |
- TermMatches title_matches; // Term matches within the page title. |
- bool can_inline; // True if this is a candidate for in-line autocompletion. |
-}; |
-typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
- |
// The URL history source. |
// Holds portions of the URL database in memory in an indexed form. Used to |
// quickly look up matching URLs for a given query string. Used by |
@@ -93,17 +62,15 @@ |
// will eliminate such words except in the case where a single character |
// is being searched on and which character occurs as the second char16 of a |
// multi-char16 instance. |
-class InMemoryURLIndex { |
+class InMemoryURLIndex : public NotificationObserver { |
public: |
// |history_dir| is a path to the directory containing the history database |
// within the profile wherein the cache and transaction journals will be |
// stored. |
- explicit InMemoryURLIndex(const FilePath& history_dir); |
- ~InMemoryURLIndex(); |
+ explicit InMemoryURLIndex(Profile* profile, |
+ const FilePath& history_dir); |
+ virtual ~InMemoryURLIndex(); |
- // Convenience types |
- typedef std::vector<string16> String16Vector; |
- |
// Opens and indexes the URL history database. |
// |languages| gives a list of language encodings with which the history |
// URLs and omnibox searches are interpreted, i.e. when each is broken |
@@ -143,37 +110,18 @@ |
// Updates or adds an history item to the index if it meets the minimum |
// 'quick' criteria. |
- void UpdateURL(URLID row_id, const URLRow& row); |
+ void UpdateURL(const URLRow& row); |
// Deletes indexing data for an history item. The item may not have actually |
// been indexed (which is the case if it did not previously meet minimum |
// 'quick' criteria). |
- void DeleteURL(URLID row_id); |
+ void DeleteURL(const URLRow& row); |
- // Breaks the |uni_string| string down into individual words and return |
- // a vector with the individual words in their original order. If |
- // |break_on_space| is false then the resulting list will contain only words |
- // containing alpha-numeric characters. If |break_on_space| is true then the |
- // resulting list will contain strings broken at whitespace. |
- // |
- // Example: |
- // Given: |uni_string|: "http://www.google.com/ harry the rabbit." |
- // With |break_on_space| false the returned list will contain: |
- // "http", "www", "google", "com", "harry", "the", "rabbit" |
- // With |break_on_space| true the returned list will contain: |
- // "http://", "www.google.com/", "harry", "the", "rabbit." |
- static String16Vector WordVectorFromString16(const string16& uni_string, |
- bool break_on_space); |
+ // Notification callback. |
+ virtual void Observe(int type, |
+ const NotificationSource& source, |
+ const NotificationDetails& details); |
- // Extract and return the offsets from |matches|. |
- static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); |
- |
- // Replace the offsets in |matches| with those given in |offsets|, deleting |
- // any which are npos, and return the updated list of matches. |
- static TermMatches ReplaceOffsetsInTermMatches( |
- const TermMatches& matches, |
- const std::vector<size_t>& offsets); |
- |
private: |
friend class AddHistoryMatch; |
friend class InMemoryURLIndexTest; |
@@ -194,29 +142,6 @@ |
// Creating one of me without a history path is not allowed (tests excepted). |
InMemoryURLIndex(); |
- // Convenience types. |
- typedef std::set<string16> String16Set; |
- typedef std::set<char16> Char16Set; |
- typedef std::vector<char16> Char16Vector; |
- |
- // An index into list of all of the words we have indexed. |
- typedef int WordID; |
- |
- // A map allowing a WordID to be determined given a word. |
- typedef std::map<string16, WordID> WordMap; |
- |
- // A map from character to word_ids. |
- typedef std::set<WordID> WordIDSet; // An index into the WordList. |
- typedef std::map<char16, WordIDSet> CharWordIDMap; |
- |
- // A map from word_id to history item. |
- // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. |
- // Consider using a smaller type. |
- typedef URLID HistoryID; |
- typedef std::set<HistoryID> HistoryIDSet; |
- typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; |
- |
- |
// Support caching of term results so that we can optimize searches which |
// build upon a previous search. Each entry in this map represents one |
// search term from the most recent search. For example, if the user had |
@@ -248,12 +173,6 @@ |
}; |
typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; |
- // TODO(rohitrao): Probably replace this with QueryResults. |
- typedef std::vector<URLRow> URLRowVector; |
- |
- // A map from history_id to the history's URL and title. |
- typedef std::map<HistoryID, URLRow> HistoryInfoMap; |
- |
// A helper class which performs the final filter on each candidate |
// history URL match, inserting accepted matches into |scored_matches_| |
// and trimming the maximum number of matches to 10. |
@@ -280,35 +199,20 @@ |
// Initializes the whitelist of URL schemes. |
static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
- // Breaks a string down into individual words. |
- static String16Set WordSetFromString16(const string16& uni_string); |
- |
- // Given a set of Char16s, finds words containing those characters. |
- WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
- |
- // Creates a TermMatches which has an entry for each occurrence of the string |
- // |term| found in the string |string|. Mark each match with |term_num| so |
- // that the resulting TermMatches can be merged with other TermMatches for |
- // other terms. |
- static TermMatches MatchTermInString(const string16& term, |
- const string16& string, |
- int term_num); |
- |
// URL History indexing support functions. |
// Indexes one URL history item. |
bool IndexRow(const URLRow& row); |
- // Breaks the |uni_word| string down into its individual characters. |
- // Note that this is temporarily intended to work on a single word, but |
- // _will_ work on a string of words, perhaps with unexpected results. |
- // TODO(mrossetti): Lots of optimizations possible here for not restarting |
- // a search if the user is just typing along. Also, change this to uniString |
- // and properly handle substring matches, scoring and sorting the results |
- // by score. Also, provide the metrics for where the matches occur so that |
- // the UI can highlight the matched sections. |
- static Char16Set Char16SetFromString16(const string16& uni_word); |
+ // Parses and indexes the words in the URL and page title of |row|. |
+ void AddRowWordsToIndex(const URLRow& row); |
+ // Removes |row| and all associated words and characters from the index. |
+ void RemoveRowFromIndex(const URLRow& row); |
+ |
+ // Removes all words and characters associated with |row| from the index. |
+ void RemoveRowWordsFromIndex(const URLRow& row); |
+ |
// Given a single word in |uni_word|, adds a reference for the containing |
// history item identified by |history_id| to the index. |
void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
@@ -352,13 +256,14 @@ |
static int ScoreComponentForMatches(const TermMatches& matches, |
size_t max_length); |
- // Sorts and removes overlapping substring matches from |matches| and |
- // returns the cleaned up matches. |
- static TermMatches SortAndDeoverlap(const TermMatches& matches); |
- |
// Determines if |gurl| has a whitelisted scheme and returns true if so. |
bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
+ // Notification handlers. |
+ void OnURLVisited(const URLVisitedDetails* details); |
+ void OnURLsModified(const URLsModifiedDetails* details); |
+ void OnURLsDeleted(const URLsDeletedDetails* details); |
+ |
// Utility functions supporting RestoreFromCache and SaveToCache. |
// Construct a file path for the cache file within the same directory where |
@@ -384,6 +289,8 @@ |
bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
+ NotificationRegistrar registrar_; |
+ |
// Directory where cache file resides. This is, except when unit testing, |
// the same directory in which the profile's history database is found. It |
// should never be empty. |
@@ -395,22 +302,9 @@ |
// the InMemoryURLIndex was last populated. |
base::Time last_saved_; |
- // A list of all of indexed words. The index of a word in this list is the |
- // ID of the word in the word_map_. It reduces the memory overhead by |
- // replacing a potentially long and repeated string with a simple index. |
- // NOTE: A word will _never_ be removed from this vector thus insuring |
- // the immutability of the word_id throughout the session, reducing |
- // maintenance complexity. |
- // TODO(mrossetti): Profile the vector allocation and determine if judicious |
- // 'reserve' calls are called for. |
- String16Vector word_list_; |
+ // The index's durable private data. |
+ scoped_ptr<URLIndexPrivateData> private_data_; |
- int history_item_count_; |
- WordMap word_map_; |
- CharWordIDMap char_word_map_; |
- WordIDHistoryMap word_id_history_map_; |
- HistoryInfoMap history_info_map_; |
- |
// Cache of search terms. |
SearchTermCacheMap search_term_cache_; |