Index: chrome/browser/history/in_memory_url_index.h |
=================================================================== |
--- chrome/browser/history/in_memory_url_index.h (revision 56967) |
+++ chrome/browser/history/in_memory_url_index.h (working copy) |
@@ -6,22 +6,6 @@ |
#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
#pragma once |
-#include <map> |
-#include <set> |
-#include <vector> |
- |
-#include "app/sql/connection.h" |
-#include "base/basictypes.h" |
-#include "base/linked_ptr.h" |
-#include "base/scoped_ptr.h" |
-#include "base/string16.h" |
-#include "chrome/browser/history/history_types.h" |
-#include "testing/gtest/include/gtest/gtest_prod.h" |
- |
-namespace base { |
-class Time; |
-} |
- |
namespace history { |
class URLDatabase; |
@@ -31,135 +15,13 @@ |
// quickly look up matching URLs for a given query string. Used by |
// the HistoryURLProvider for inline autocomplete and to provide URL |
// matches to the omnibox. |
-// |
-// Note about multi-byte codepoints and the data structures in the |
-// InMemoryURLIndex class: One will quickly notice that no effort is made to |
-// insure that multi-byte character boundaries are detected when indexing the |
-// words and characters in the URL history database except when converting |
-// URL strings to lowercase. Multi-byte-edness makes no difference when |
-// indexing or when searching the index as the final filtering of results |
-// is dependent on the comparison of a string of bytes, not individual |
-// characters. While the lookup of those bytes during a search in the |
-// |char_word_map_| could serve up words in which the individual char16 |
-// occurs as a portion of a composite character the next filtering step |
-// will eliminate such words except in the case where a single character |
-// is being searched on and which character occurs as the second char16 of a |
-// multi-char16 instance. |
class InMemoryURLIndex { |
public: |
- InMemoryURLIndex(); |
- ~InMemoryURLIndex(); |
+ InMemoryURLIndex() {} |
+ ~InMemoryURLIndex() {} |
- // Convenience types |
- typedef std::vector<string16> String16Vector; |
- |
// Open and index the URL history database. |
- bool Init(URLDatabase* history_db, const string16& languages); |
- |
- // Reset the history index. |
- void Reset(); |
- |
- // Given a vector containing one or more words as string16s, scan the |
- // history index and return a vector with all scored, matching history items. |
- // Each term must occur somewhere in the history item for the item to |
- // qualify; however, the terms do not necessarily have to be adjacent. |
- HistoryMatches HistoryItemsForTerms(const String16Vector& terms); |
- |
- // Returns the date threshold for considering an history item as significant. |
- static base::Time RecentThreshold(); |
- |
- private: |
- friend class InMemoryURLIndexTest; |
- FRIEND_TEST(InMemoryURLIndexTest, Initialization); |
- |
- // Convenience types |
- typedef std::set<string16> String16Set; |
- typedef std::set<char16> Char16Set; |
- |
- // An index into list of all of the words we have indexed. |
- typedef int16 WordID; |
- |
- // A map allowing a WordID to be determined given a word. |
- typedef std::map<string16, WordID> WordMap; |
- |
- // A map from character to word_ids. |
- typedef std::set<WordID> WordIDSet; // An index into the WordList. |
- typedef std::map<char16, WordIDSet> CharWordIDMap; |
- |
- // A map from word_id to history item. |
- // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. |
- // Consider using a smaller type. |
- typedef URLID HistoryID; |
- typedef std::set<HistoryID> HistoryIDSet; |
- typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; |
- |
- // Support caching of term character intersections so that we can optimize |
- // searches which build upon a previous search. |
- struct TermCharWordSet { |
- TermCharWordSet(Char16Set char_set, WordIDSet word_id_set, bool used) |
- : char_set_(char_set), |
- word_id_set_(word_id_set), |
- used_(used) {} |
- |
- Char16Set char_set_; |
- WordIDSet word_id_set_; |
- bool used_; // true if this set has been used for the current term search. |
- }; |
- typedef std::vector<TermCharWordSet> TermCharWordSetVector; |
- |
- // TODO(rohitrao): Probably replace this with QueryResults. |
- typedef std::vector<URLRow> URLRowVector; |
- |
- // A map from history_id to the history's URL and title. |
- typedef std::map<HistoryID, URLRow> HistoryInfoMap; |
- |
- // Break a string down into individual words. |
- String16Set WordsFromString16(const string16& uni_string); |
- |
- // URL History indexing support functions. |
- |
- // Index one URL history item. |
- bool IndexRow(URLRow row); |
- |
- // Break a string down into its individual characters. |
- // Note that this is temporarily intended to work on a single word, but |
- // _will_ work on a string of words, perhaps with unexpected results. |
- // TODO(mrossetti): Lots of optimizations possible here for not restarting |
- // a search if the user is just typing along. Also, change this to uniString |
- // and properly handle substring matches, scoring and sorting the results |
- // by score. Also, provide the metrics for where the matches occur so that |
- // the UI can highlight the matched sections. |
- Char16Set CharactersFromString16(const string16& uni_word); |
- |
- // Given a single word, add a reference to the containing history item |
- // to the index. |
- void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
- |
- // Update an existing entry in the word/history index by adding the |
- // |history_id| to set for |word_id| in the word_id_history_map_. |
- void UpdateWordHistory(WordID word_id, HistoryID history_id); |
- |
- // Create a new entry in the word/history map for |word_id| and add |
- // |history_id| as the initial element of the word's set. |
- void AddWordHistory(const string16& uni_word, HistoryID history_id); |
- |
- // A list of all of indexed words. The index of a word in this list is the |
- // ID of the word in the word_map_. It reduces the memory overhead by |
- // replacing a potentially long and repeated string with a simple index. |
- // NOTE: A word will _never_ be removed from this vector thus insuring |
- // the immutability of the word_id throughout the session, reducing |
- // maintenance complexity. |
- String16Vector word_list_; |
- |
- uint64 history_item_count_; |
- WordMap word_map_; |
- CharWordIDMap char_word_map_; |
- WordIDHistoryMap word_id_history_map_; |
- TermCharWordSetVector term_char_word_set_cache_; |
- HistoryInfoMap history_info_map_; |
- string16 languages_; |
- |
- DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
+ bool Init(URLDatabase* history_db); |
}; |
} // namespace history |