chrome/browser/history/in_memory_url_index.h - Issue 3138006: Next step integrating the HistoryQuickProvider: Implement index initializatio...

Unified Diff: chrome/browser/history/in_memory_url_index.h

Issue 3138006: Next step integrating the HistoryQuickProvider: Implement index initializatio... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/in_memory_url_index.h

===================================================================

--- chrome/browser/history/in_memory_url_index.h (revision 56956)

+++ chrome/browser/history/in_memory_url_index.h (working copy)

@@ -6,6 +6,22 @@

#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_

#pragma once

+#include <map>

+#include <set>

+#include <vector>

+#include "app/sql/connection.h"

+#include "base/basictypes.h"

+#include "base/linked_ptr.h"

+#include "base/scoped_ptr.h"

+#include "base/string16.h"

+#include "chrome/browser/history/history_types.h"

+#include "testing/gtest/include/gtest/gtest_prod.h"

+namespace base {

+class Time;

namespace history {

class URLDatabase;

@@ -15,13 +31,135 @@

// quickly look up matching URLs for a given query string. Used by

// the HistoryURLProvider for inline autocomplete and to provide URL

// matches to the omnibox.

+//

+// Note about multi-byte codepoints and the data structures in the

+// InMemoryURLIndex class: One will quickly notice that no effort is made to

+// insure that multi-byte character boundaries are detected when indexing the

+// words and characters in the URL history database except when converting

+// URL strings to lowercase. Multi-byte-edness makes no difference when

+// indexing or when searching the index as the final filtering of results

+// is dependent on the comparison of a string of bytes, not individual

+// characters. While the lookup of those bytes during a search in the

+// |char_word_map_| could serve up words in which the individual char16

+// occurs as a portion of a composite character the next filtering step

+// will eliminate such words except in the case where a single character

+// is being searched on and which character occurs as the second char16 of a

+// multi-char16 instance.

class InMemoryURLIndex {

public:

- InMemoryURLIndex() {}

- ~InMemoryURLIndex() {}

+ InMemoryURLIndex();

+ ~InMemoryURLIndex();

+ // Convenience types

+ typedef std::vector<string16> String16Vector;

// Open and index the URL history database.

- bool Init(URLDatabase* history_db);

+ bool Init(URLDatabase* history_db, const string16& languages);

+ // Reset the history index.

+ void Reset();

+ // Given a vector containing one or more words as string16s, scan the

+ // history index and return a vector with all scored, matching history items.

+ // Each term must occur somewhere in the history item for the item to

+ // qualify; however, the terms do not necessarily have to be adjacent.

+ HistoryMatches HistoryItemsForTerms(const String16Vector& terms);

+ // Returns the date threshold for considering an history item as significant.

+ static base::Time RecentThreshold();

+ private:

+ friend class InMemoryURLIndexTest;

+ FRIEND_TEST(InMemoryURLIndexTest, Initialization);

+ // Convenience types

+ typedef std::set<string16> String16Set;

+ typedef std::set<char16> Char16Set;

+ // An index into list of all of the words we have indexed.

+ typedef int16 WordID;

+ // A map allowing a WordID to be determined given a word.

+ typedef std::map<string16, WordID> WordMap;

+ // A map from character to word_ids.

+ typedef std::set<WordID> WordIDSet; // An index into the WordList.

+ typedef std::map<char16, WordIDSet> CharWordIDMap;

+ // A map from word_id to history item.

+ // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.

+ // Consider using a smaller type.

+ typedef URLID HistoryID;

+ typedef std::set<HistoryID> HistoryIDSet;

+ typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;

+ // Support caching of term character intersections so that we can optimize

+ // searches which build upon a previous search.

+ struct TermCharWordSet {

+ TermCharWordSet(Char16Set char_set, WordIDSet word_id_set, bool used)

+ : char_set_(char_set),

+ word_id_set_(word_id_set),

+ used_(used) {}

+ Char16Set char_set_;

+ WordIDSet word_id_set_;

+ bool used_; // true if this set has been used for the current term search.

+ };

+ typedef std::vector<TermCharWordSet> TermCharWordSetVector;

+ // TODO(rohitrao): Probably replace this with QueryResults.

+ typedef std::vector<URLRow> URLRowVector;

+ // A map from history_id to the history's URL and title.

+ typedef std::map<HistoryID, URLRow> HistoryInfoMap;

+ // Break a string down into individual words.

+ String16Set WordsFromString16(const string16& uni_string);

+ // URL History indexing support functions.

+ // Index one URL history item.

+ bool IndexRow(URLRow row);

+ // Break a string down into its individual characters.

+ // Note that this is temporarily intended to work on a single word, but

+ // _will_ work on a string of words, perhaps with unexpected results.

+ // TODO(mrossetti): Lots of optimizations possible here for not restarting

+ // a search if the user is just typing along. Also, change this to uniString

+ // and properly handle substring matches, scoring and sorting the results

+ // by score. Also, provide the metrics for where the matches occur so that

+ // the UI can highlight the matched sections.

+ Char16Set CharactersFromString16(const string16& uni_word);

+ // Given a single word, add a reference to the containing history item

+ // to the index.

+ void AddWordToIndex(const string16& uni_word, HistoryID history_id);

+ // Update an existing entry in the word/history index by adding the

+ // |history_id| to set for |word_id| in the word_id_history_map_.

+ void UpdateWordHistory(WordID word_id, HistoryID history_id);

+ // Create a new entry in the word/history map for |word_id| and add

+ // |history_id| as the initial element of the word's set.

+ void AddWordHistory(const string16& uni_word, HistoryID history_id);

+ // A list of all of indexed words. The index of a word in this list is the

+ // ID of the word in the word_map_. It reduces the memory overhead by

+ // replacing a potentially long and repeated string with a simple index.

+ // NOTE: A word will _never_ be removed from this vector thus insuring

+ // the immutability of the word_id throughout the session, reducing

+ // maintenance complexity.

+ String16Vector word_list_;

+ uint64 history_item_count_;

+ WordMap word_map_;

+ CharWordIDMap char_word_map_;

+ WordIDHistoryMap word_id_history_map_;

+ TermCharWordSetVector term_char_word_set_cache_;

+ HistoryInfoMap history_info_map_;

+ string16 languages_;

+ DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);

};

} // namespace history

« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »