chrome/browser/history/in_memory_url_index.h - Issue 8120004: HQP Refactoring (in Preparation for SQLite Cache)

Unified Diff: chrome/browser/history/in_memory_url_index.h

Issue 8120004: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Another try at getting the patch to take. Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/in_memory_url_index.h

===================================================================

--- chrome/browser/history/in_memory_url_index.h (revision 105354)

+++ chrome/browser/history/in_memory_url_index.h (working copy)

@@ -21,16 +21,15 @@

#include "chrome/browser/autocomplete/autocomplete_match.h"

#include "chrome/browser/autocomplete/history_provider_util.h"

#include "chrome/browser/history/history_types.h"

+#include "chrome/browser/history/in_memory_url_index_types.h"

#include "chrome/browser/history/in_memory_url_index_cache.pb.h"

+#include "content/common/notification_observer.h"

+#include "content/common/notification_registrar.h"

#include "sql/connection.h"

#include "testing/gtest/include/gtest/gtest_prod.h"

class Profile;

-namespace base {

-class Time;

namespace in_memory_url_index {

class InMemoryURLIndexCacheItem;

}

@@ -40,40 +39,10 @@

namespace imui = in_memory_url_index;

class URLDatabase;

+struct URLsDeletedDetails;

+struct URLsModifiedDetails;

+struct URLVisitedDetails;

-// Specifies where an omnibox term occurs within a string. Used for specifying

-// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in

-// scoring a result.

-struct TermMatch {

- TermMatch(int term_num, size_t offset, size_t length)

- : term_num(term_num),

- offset(offset),

- length(length) {}

- int term_num; // The index of the term in the original search string.

- size_t offset; // The starting offset of the substring match.

- size_t length; // The length of the substring match.

-};

-typedef std::vector<TermMatch> TermMatches;

-// Used for intermediate history result operations.

-struct ScoredHistoryMatch : public HistoryMatch {

- ScoredHistoryMatch(); // Required by STL.

- explicit ScoredHistoryMatch(const URLRow& url_info);

- ~ScoredHistoryMatch();

- static bool MatchScoreGreater(const ScoredHistoryMatch& m1,

- const ScoredHistoryMatch& m2);

- // An interim score taking into consideration location and completeness

- // of the match.

- int raw_score;

- TermMatches url_matches; // Term matches within the URL.

- TermMatches title_matches; // Term matches within the page title.

- bool can_inline; // True if this is a candidate for in-line autocompletion.

-};

-typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;

// The URL history source.

// Holds portions of the URL database in memory in an indexed form. Used to

// quickly look up matching URLs for a given query string. Used by

@@ -93,17 +62,15 @@

// will eliminate such words except in the case where a single character

// is being searched on and which character occurs as the second char16 of a

// multi-char16 instance.

-class InMemoryURLIndex {

+class InMemoryURLIndex : public NotificationObserver {

public:

// |history_dir| is a path to the directory containing the history database

// within the profile wherein the cache and transaction journals will be

// stored.

- explicit InMemoryURLIndex(const FilePath& history_dir);

- ~InMemoryURLIndex();

+ explicit InMemoryURLIndex(Profile* profile,

+ const FilePath& history_dir);

+ virtual ~InMemoryURLIndex();

- // Convenience types

- typedef std::vector<string16> String16Vector;

// Opens and indexes the URL history database.

// |languages| gives a list of language encodings with which the history

// URLs and omnibox searches are interpreted, i.e. when each is broken

@@ -143,37 +110,18 @@

// Updates or adds an history item to the index if it meets the minimum

// 'quick' criteria.

- void UpdateURL(URLID row_id, const URLRow& row);

+ void UpdateURL(const URLRow& row);

// Deletes indexing data for an history item. The item may not have actually

// been indexed (which is the case if it did not previously meet minimum

// 'quick' criteria).

- void DeleteURL(URLID row_id);

+ void DeleteURL(const URLRow& row);

- // Breaks the |uni_string| string down into individual words and return

- // a vector with the individual words in their original order. If

- // |break_on_space| is false then the resulting list will contain only words

- // containing alpha-numeric characters. If |break_on_space| is true then the

- // resulting list will contain strings broken at whitespace.

- //

- // Example:

- // Given: |uni_string|: "http://www.google.com/ harry the rabbit."

- // With |break_on_space| false the returned list will contain:

- // "http", "www", "google", "com", "harry", "the", "rabbit"

- // With |break_on_space| true the returned list will contain:

- // "http://", "www.google.com/", "harry", "the", "rabbit."

- static String16Vector WordVectorFromString16(const string16& uni_string,

- bool break_on_space);

+ // Notification callback.

+ virtual void Observe(int type,

+ const NotificationSource& source,

+ const NotificationDetails& details);

- // Extract and return the offsets from |matches|.

- static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);

- // Replace the offsets in |matches| with those given in |offsets|, deleting

- // any which are npos, and return the updated list of matches.

- static TermMatches ReplaceOffsetsInTermMatches(

- const TermMatches& matches,

- const std::vector<size_t>& offsets);

private:

friend class AddHistoryMatch;

friend class InMemoryURLIndexTest;

@@ -194,29 +142,6 @@

// Creating one of me without a history path is not allowed (tests excepted).

InMemoryURLIndex();

- // Convenience types.

- typedef std::set<string16> String16Set;

- typedef std::set<char16> Char16Set;

- typedef std::vector<char16> Char16Vector;

- // An index into list of all of the words we have indexed.

- typedef int WordID;

- // A map allowing a WordID to be determined given a word.

- typedef std::map<string16, WordID> WordMap;

- // A map from character to word_ids.

- typedef std::set<WordID> WordIDSet; // An index into the WordList.

- typedef std::map<char16, WordIDSet> CharWordIDMap;

- // A map from word_id to history item.

- // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.

- // Consider using a smaller type.

- typedef URLID HistoryID;

- typedef std::set<HistoryID> HistoryIDSet;

- typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;

// Support caching of term results so that we can optimize searches which

// build upon a previous search. Each entry in this map represents one

// search term from the most recent search. For example, if the user had

@@ -248,12 +173,6 @@

};

typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap;

- // TODO(rohitrao): Probably replace this with QueryResults.

- typedef std::vector<URLRow> URLRowVector;

- // A map from history_id to the history's URL and title.

- typedef std::map<HistoryID, URLRow> HistoryInfoMap;

// A helper class which performs the final filter on each candidate

// history URL match, inserting accepted matches into |scored_matches_|

// and trimming the maximum number of matches to 10.

@@ -280,35 +199,20 @@

// Initializes the whitelist of URL schemes.

static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);

- // Breaks a string down into individual words.

- static String16Set WordSetFromString16(const string16& uni_string);

- // Given a set of Char16s, finds words containing those characters.

- WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);

- // Creates a TermMatches which has an entry for each occurrence of the string

- // that the resulting TermMatches can be merged with other TermMatches for

- // other terms.

- static TermMatches MatchTermInString(const string16& term,

- const string16& string,

- int term_num);

// URL History indexing support functions.

// Indexes one URL history item.

bool IndexRow(const URLRow& row);

- // Breaks the |uni_word| string down into its individual characters.

- // Note that this is temporarily intended to work on a single word, but

- // _will_ work on a string of words, perhaps with unexpected results.

- // TODO(mrossetti): Lots of optimizations possible here for not restarting

- // a search if the user is just typing along. Also, change this to uniString

- // and properly handle substring matches, scoring and sorting the results

- // by score. Also, provide the metrics for where the matches occur so that

- // the UI can highlight the matched sections.

- static Char16Set Char16SetFromString16(const string16& uni_word);

+ // Parses and indexes the words in the URL and page title of |row|.

+ void AddRowWordsToIndex(const URLRow& row);

+ // Removes |row| and all associated words and characters from the index.

+ void RemoveRowFromIndex(const URLRow& row);

+ // Removes all words and characters associated with |row| from the index.

+ void RemoveRowWordsFromIndex(const URLRow& row);

// Given a single word in |uni_word|, adds a reference for the containing

// history item identified by |history_id| to the index.

void AddWordToIndex(const string16& uni_word, HistoryID history_id);

@@ -352,13 +256,14 @@

static int ScoreComponentForMatches(const TermMatches& matches,

size_t max_length);

- // Sorts and removes overlapping substring matches from |matches| and

- // returns the cleaned up matches.

- static TermMatches SortAndDeoverlap(const TermMatches& matches);

// Determines if |gurl| has a whitelisted scheme and returns true if so.

bool URLSchemeIsWhitelisted(const GURL& gurl) const;

+ // Notification handlers.

+ void OnURLVisited(const URLVisitedDetails* details);

+ void OnURLsModified(const URLsModifiedDetails* details);

+ void OnURLsDeleted(const URLsDeletedDetails* details);

// Utility functions supporting RestoreFromCache and SaveToCache.

// Construct a file path for the cache file within the same directory where

@@ -384,6 +289,8 @@

bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);

bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);

+ NotificationRegistrar registrar_;

// Directory where cache file resides. This is, except when unit testing,

// the same directory in which the profile's history database is found. It

// should never be empty.

@@ -395,22 +302,9 @@

// the InMemoryURLIndex was last populated.

base::Time last_saved_;

- // A list of all of indexed words. The index of a word in this list is the

- // ID of the word in the word_map_. It reduces the memory overhead by

- // replacing a potentially long and repeated string with a simple index.

- // NOTE: A word will _never_ be removed from this vector thus insuring

- // the immutability of the word_id throughout the session, reducing

- // maintenance complexity.

- // TODO(mrossetti): Profile the vector allocation and determine if judicious

- // 'reserve' calls are called for.

- String16Vector word_list_;

+ // The index's durable private data.

+ scoped_ptr<URLIndexPrivateData> private_data_;

- int history_item_count_;

- WordMap word_map_;

- CharWordIDMap char_word_map_;

- WordIDHistoryMap word_id_history_map_;

- HistoryInfoMap history_info_map_;

// Cache of search terms.

SearchTermCacheMap search_term_cache_;

« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »