Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Unified Diff: chrome/browser/history/in_memory_url_index.h

Issue 8291005: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Rattle those Bots Senseless Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/history/in_memory_url_index.h
===================================================================
--- chrome/browser/history/in_memory_url_index.h (revision 105497)
+++ chrome/browser/history/in_memory_url_index.h (working copy)
@@ -21,16 +21,15 @@
#include "chrome/browser/autocomplete/autocomplete_match.h"
#include "chrome/browser/autocomplete/history_provider_util.h"
#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/in_memory_url_index_types.h"
#include "chrome/browser/history/in_memory_url_index_cache.pb.h"
+#include "content/common/notification_observer.h"
+#include "content/common/notification_registrar.h"
#include "sql/connection.h"
#include "testing/gtest/include/gtest/gtest_prod.h"
class Profile;
-namespace base {
-class Time;
-}
-
namespace in_memory_url_index {
class InMemoryURLIndexCacheItem;
}
@@ -40,40 +39,10 @@
namespace imui = in_memory_url_index;
class URLDatabase;
+struct URLsDeletedDetails;
+struct URLsModifiedDetails;
+struct URLVisitedDetails;
-// Specifies where an omnibox term occurs within a string. Used for specifying
-// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in
-// scoring a result.
-struct TermMatch {
- TermMatch(int term_num, size_t offset, size_t length)
- : term_num(term_num),
- offset(offset),
- length(length) {}
-
- int term_num; // The index of the term in the original search string.
- size_t offset; // The starting offset of the substring match.
- size_t length; // The length of the substring match.
-};
-typedef std::vector<TermMatch> TermMatches;
-
-// Used for intermediate history result operations.
-struct ScoredHistoryMatch : public HistoryMatch {
- ScoredHistoryMatch(); // Required by STL.
- explicit ScoredHistoryMatch(const URLRow& url_info);
- ~ScoredHistoryMatch();
-
- static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
- const ScoredHistoryMatch& m2);
-
- // An interim score taking into consideration location and completeness
- // of the match.
- int raw_score;
- TermMatches url_matches; // Term matches within the URL.
- TermMatches title_matches; // Term matches within the page title.
- bool can_inline; // True if this is a candidate for in-line autocompletion.
-};
-typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
-
// The URL history source.
// Holds portions of the URL database in memory in an indexed form. Used to
// quickly look up matching URLs for a given query string. Used by
@@ -93,17 +62,15 @@
// will eliminate such words except in the case where a single character
// is being searched on and which character occurs as the second char16 of a
// multi-char16 instance.
-class InMemoryURLIndex {
+class InMemoryURLIndex : public NotificationObserver {
public:
// |history_dir| is a path to the directory containing the history database
// within the profile wherein the cache and transaction journals will be
// stored.
- explicit InMemoryURLIndex(const FilePath& history_dir);
- ~InMemoryURLIndex();
+ explicit InMemoryURLIndex(Profile* profile,
+ const FilePath& history_dir);
+ virtual ~InMemoryURLIndex();
- // Convenience types
- typedef std::vector<string16> String16Vector;
-
// Opens and indexes the URL history database.
// |languages| gives a list of language encodings with which the history
// URLs and omnibox searches are interpreted, i.e. when each is broken
@@ -143,37 +110,18 @@
// Updates or adds an history item to the index if it meets the minimum
// 'quick' criteria.
- void UpdateURL(URLID row_id, const URLRow& row);
+ void UpdateURL(const URLRow& row);
// Deletes indexing data for an history item. The item may not have actually
// been indexed (which is the case if it did not previously meet minimum
// 'quick' criteria).
- void DeleteURL(URLID row_id);
+ void DeleteURL(const URLRow& row);
- // Breaks the |uni_string| string down into individual words and return
- // a vector with the individual words in their original order. If
- // |break_on_space| is false then the resulting list will contain only words
- // containing alpha-numeric characters. If |break_on_space| is true then the
- // resulting list will contain strings broken at whitespace.
- //
- // Example:
- // Given: |uni_string|: "http://www.google.com/ harry the rabbit."
- // With |break_on_space| false the returned list will contain:
- // "http", "www", "google", "com", "harry", "the", "rabbit"
- // With |break_on_space| true the returned list will contain:
- // "http://", "www.google.com/", "harry", "the", "rabbit."
- static String16Vector WordVectorFromString16(const string16& uni_string,
- bool break_on_space);
+ // Notification callback.
+ virtual void Observe(int type,
+ const NotificationSource& source,
+ const NotificationDetails& details);
- // Extract and return the offsets from |matches|.
- static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
-
- // Replace the offsets in |matches| with those given in |offsets|, deleting
- // any which are npos, and return the updated list of matches.
- static TermMatches ReplaceOffsetsInTermMatches(
- const TermMatches& matches,
- const std::vector<size_t>& offsets);
-
private:
friend class AddHistoryMatch;
friend class InMemoryURLIndexTest;
@@ -194,29 +142,6 @@
// Creating one of me without a history path is not allowed (tests excepted).
InMemoryURLIndex();
- // Convenience types.
- typedef std::set<string16> String16Set;
- typedef std::set<char16> Char16Set;
- typedef std::vector<char16> Char16Vector;
-
- // An index into list of all of the words we have indexed.
- typedef int WordID;
-
- // A map allowing a WordID to be determined given a word.
- typedef std::map<string16, WordID> WordMap;
-
- // A map from character to word_ids.
- typedef std::set<WordID> WordIDSet; // An index into the WordList.
- typedef std::map<char16, WordIDSet> CharWordIDMap;
-
- // A map from word_id to history item.
- // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.
- // Consider using a smaller type.
- typedef URLID HistoryID;
- typedef std::set<HistoryID> HistoryIDSet;
- typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
-
-
// Support caching of term results so that we can optimize searches which
// build upon a previous search. Each entry in this map represents one
// search term from the most recent search. For example, if the user had
@@ -248,12 +173,6 @@
};
typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap;
- // TODO(rohitrao): Probably replace this with QueryResults.
- typedef std::vector<URLRow> URLRowVector;
-
- // A map from history_id to the history's URL and title.
- typedef std::map<HistoryID, URLRow> HistoryInfoMap;
-
// A helper class which performs the final filter on each candidate
// history URL match, inserting accepted matches into |scored_matches_|
// and trimming the maximum number of matches to 10.
@@ -280,35 +199,20 @@
// Initializes the whitelist of URL schemes.
static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
- // Breaks a string down into individual words.
- static String16Set WordSetFromString16(const string16& uni_string);
-
- // Given a set of Char16s, finds words containing those characters.
- WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
-
- // Creates a TermMatches which has an entry for each occurrence of the string
- // |term| found in the string |string|. Mark each match with |term_num| so
- // that the resulting TermMatches can be merged with other TermMatches for
- // other terms.
- static TermMatches MatchTermInString(const string16& term,
- const string16& string,
- int term_num);
-
// URL History indexing support functions.
// Indexes one URL history item.
bool IndexRow(const URLRow& row);
- // Breaks the |uni_word| string down into its individual characters.
- // Note that this is temporarily intended to work on a single word, but
- // _will_ work on a string of words, perhaps with unexpected results.
- // TODO(mrossetti): Lots of optimizations possible here for not restarting
- // a search if the user is just typing along. Also, change this to uniString
- // and properly handle substring matches, scoring and sorting the results
- // by score. Also, provide the metrics for where the matches occur so that
- // the UI can highlight the matched sections.
- static Char16Set Char16SetFromString16(const string16& uni_word);
+ // Parses and indexes the words in the URL and page title of |row|.
+ void AddRowWordsToIndex(const URLRow& row);
+ // Removes |row| and all associated words and characters from the index.
+ void RemoveRowFromIndex(const URLRow& row);
+
+ // Removes all words and characters associated with |row| from the index.
+ void RemoveRowWordsFromIndex(const URLRow& row);
+
// Given a single word in |uni_word|, adds a reference for the containing
// history item identified by |history_id| to the index.
void AddWordToIndex(const string16& uni_word, HistoryID history_id);
@@ -352,13 +256,14 @@
static int ScoreComponentForMatches(const TermMatches& matches,
size_t max_length);
- // Sorts and removes overlapping substring matches from |matches| and
- // returns the cleaned up matches.
- static TermMatches SortAndDeoverlap(const TermMatches& matches);
-
// Determines if |gurl| has a whitelisted scheme and returns true if so.
bool URLSchemeIsWhitelisted(const GURL& gurl) const;
+ // Notification handlers.
+ void OnURLVisited(const URLVisitedDetails* details);
+ void OnURLsModified(const URLsModifiedDetails* details);
+ void OnURLsDeleted(const URLsDeletedDetails* details);
+
// Utility functions supporting RestoreFromCache and SaveToCache.
// Construct a file path for the cache file within the same directory where
@@ -384,6 +289,8 @@
bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
+ NotificationRegistrar registrar_;
+
// Directory where cache file resides. This is, except when unit testing,
// the same directory in which the profile's history database is found. It
// should never be empty.
@@ -395,22 +302,9 @@
// the InMemoryURLIndex was last populated.
base::Time last_saved_;
- // A list of all of indexed words. The index of a word in this list is the
- // ID of the word in the word_map_. It reduces the memory overhead by
- // replacing a potentially long and repeated string with a simple index.
- // NOTE: A word will _never_ be removed from this vector thus insuring
- // the immutability of the word_id throughout the session, reducing
- // maintenance complexity.
- // TODO(mrossetti): Profile the vector allocation and determine if judicious
- // 'reserve' calls are called for.
- String16Vector word_list_;
+ // The index's durable private data.
+ scoped_ptr<URLIndexPrivateData> private_data_;
- int history_item_count_;
- WordMap word_map_;
- CharWordIDMap char_word_map_;
- WordIDHistoryMap word_id_history_map_;
- HistoryInfoMap history_info_map_;
-
// Cache of search terms.
SearchTermCacheMap search_term_cache_;
« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698