Index: chrome/browser/history/in_memory_url_index_types.h |
=================================================================== |
--- chrome/browser/history/in_memory_url_index_types.h (revision 0) |
+++ chrome/browser/history/in_memory_url_index_types.h (revision 0) |
@@ -0,0 +1,206 @@ |
+// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
+#pragma once |
+ |
+#include <map> |
+#include <set> |
+#include <string> |
+#include <vector> |
+ |
+#include "base/basictypes.h" |
+#include "base/gtest_prod_util.h" |
+#include "base/string16.h" |
+#include "chrome/browser/autocomplete/history_provider_util.h" |
+#include "chrome/browser/history/history_types.h" |
+ |
+namespace history { |
+ |
+// Matches within URL and Title Strings ---------------------------------------- |
+ |
+// Specifies where an omnibox term occurs within a string. Used for specifying |
+// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in |
+// scoring a result. |
+struct TermMatch { |
+ TermMatch() : term_num(0), offset(0), length(0) {} |
+ TermMatch(int term_num, size_t offset, size_t length) |
+ : term_num(term_num), offset(offset), length(length) {} |
+ |
+ int term_num; // The index of the term in the original search string. |
+ size_t offset; // The starting offset of the substring match. |
+ size_t length; // The length of the substring match. |
+}; |
+typedef std::vector<TermMatch> TermMatches; |
+ |
+// Returns a TermMatches which has an entry for each occurrence of the string |
+// |term| found in the string |string|. Mark each match with |term_num| so |
+// that the resulting TermMatches can be merged with other TermMatches for |
+// other terms. Note that only the first 2,048 characters of |string| are |
+// considered during the match operation. |
+TermMatches MatchTermInString(const string16& term, |
+ const string16& string, |
+ int term_num); |
+ |
+// Sorts and removes overlapping substring matches from |matches| and |
+// returns the cleaned up matches. |
+TermMatches SortAndDeoverlapMatches(const TermMatches& matches); |
+ |
+// Extracts and returns the offsets from |matches|. |
+std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); |
+ |
+// Replaces the offsets in |matches| with those given in |offsets|, deleting |
+// any which are npos, and returns the updated list of matches. |
+TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, |
+ const std::vector<size_t>& offsets); |
+ |
+// Used for intermediate history result operations ----------------------------- |
+ |
+struct ScoredHistoryMatch : public history::HistoryMatch { |
+ ScoredHistoryMatch(); // Required by STL. |
+ explicit ScoredHistoryMatch(const history::URLRow& url_info); |
+ ~ScoredHistoryMatch(); |
+ |
+ static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
+ const ScoredHistoryMatch& m2); |
+ |
+ // An interim score taking into consideration location and completeness |
+ // of the match. |
+ int raw_score; |
+ TermMatches url_matches; // Term matches within the URL. |
+ TermMatches title_matches; // Term matches within the page title. |
+ bool can_inline; // True if this is a candidate for in-line autocompletion. |
+}; |
+typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
+ |
+// Convenience Types ----------------------------------------------------------- |
+ |
+typedef std::vector<string16> String16Vector; |
+typedef std::set<string16> String16Set; |
+typedef std::set<char16> Char16Set; |
+typedef std::vector<char16> Char16Vector; |
+ |
+// Utility Functions Relates to Convenience Types ------------------------------ |
+ |
+// Breaks a string down into individual words. |
+String16Set String16SetFromString16(const string16& uni_string); |
+ |
+// Breaks the |uni_string| string down into individual words and return |
+// a vector with the individual words in their original order. If |
+// |break_on_space| is false then the resulting list will contain only words |
+// containing alpha-numeric characters. If |break_on_space| is true then the |
+// resulting list will contain strings broken at whitespace. (|break_on_space| |
+// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) |
+// approach is to be used. For a complete description of this algorithm |
+// refer to the comments in base/i18n/break_iterator.h.) |
+// |
+// Example: |
+// Given: |uni_string|: "http://www.google.com/ harry the rabbit." |
+// With |break_on_space| false the returned list will contain: |
+// "http", "www", "google", "com", "harry", "the", "rabbit" |
+// With |break_on_space| true the returned list will contain: |
+// "http://", "www.google.com/", "harry", "the", "rabbit." |
+String16Vector String16VectorFromString16(const string16& uni_string, |
+ bool break_on_space); |
+ |
+// Breaks the |uni_word| string down into its individual characters. |
+// Note that this is temporarily intended to work on a single word, but |
+// _will_ work on a string of words, perhaps with unexpected results. |
+// TODO(mrossetti): Lots of optimizations possible here for not restarting |
+// a search if the user is just typing along. Also, change this to uniString |
+// and properly handle substring matches, scoring and sorting the results |
+// by score. Also, provide the metrics for where the matches occur so that |
+// the UI can highlight the matched sections. |
+Char16Set Char16SetFromString16(const string16& uni_word); |
+ |
+// Support for InMemoryURLIndex Private Data ----------------------------------- |
+ |
+// An index into a list of all of the words we have indexed. |
+typedef size_t WordID; |
+ |
+// A map allowing a WordID to be determined given a word. |
+typedef std::map<string16, WordID> WordMap; |
+ |
+// A map from character to the word_ids of words containing that character. |
+typedef std::set<WordID> WordIDSet; // An index into the WordList. |
+typedef std::map<char16, WordIDSet> CharWordIDMap; |
+ |
+// A map from word (by word_id) to history items containing that word. |
+typedef history::URLID HistoryID; |
+typedef std::set<HistoryID> HistoryIDSet; |
+typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; |
+typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap; |
+ |
+// A map from history_id to the history's URL and title. |
+typedef std::map<HistoryID, URLRow> HistoryInfoMap; |
+ |
+// TODO(rohitrao): Probably replace this with QueryResults. |
+typedef std::vector<history::URLRow> URLRowVector; |
+ |
+// A structure describing the index's internal data. This structure is used |
+// by the InMemoryURLIndex, InMemoryURLIndexBackend, and |
+// InMemoryURLCacheDatabase classes. |
+class URLIndexPrivateData { |
+ public: |
+ URLIndexPrivateData(); |
+ ~URLIndexPrivateData(); |
+ |
+ // Clear all of our data. |
+ void Clear(); |
+ |
+ // Adds |word_id| to |history_id|'s entry in the history/word map, |
+ // creating a new entry if one does not already exist. |
+ void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
+ |
+ // Given a set of Char16s, finds words containing those characters. |
+ WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
+ |
+ private: |
+ friend class InMemoryURLIndex; |
+ friend class InMemoryURLIndexTest; |
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
+ FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
+ |
+ // A list of all of indexed words. The index of a word in this list is the |
+ // ID of the word in the word_map_. It reduces the memory overhead by |
+ // replacing a potentially long and repeated string with a simple index. |
+ String16Vector word_list_; |
+ |
+ // A list of available words slots in |word_list_|. An available word slot |
+ // is the index of a unused word in word_list_ vector, also referred to as |
+ // a WordID. As URL visits are added or modified new words may be added to |
+ // the index, in which case any available words are used, if any, and then |
+ // words are added to the end of the word_list_. When URL visits are |
+ // modified or deleted old words may be removed from the index, in which |
+ // case the slots for those words are added to available_words_ for resuse |
+ // by future URL updates. |
+ WordIDSet available_words_; |
+ |
+ // A one-to-one mapping from the a word string to its slot number (i.e. |
+ // WordID) in the |word_list_|. |
+ WordMap word_map_; |
+ |
+ // A one-to-many mapping from a single character to all WordIDs of words |
+ // containing that character. |
+ CharWordIDMap char_word_map_; |
+ |
+ // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as |
+ // used in the history database) of history items in which the word occurs. |
+ WordIDHistoryMap word_id_history_map_; |
+ |
+ // A one-to-many mapping from a HistoryID to all WordIDs of words that occur |
+ // in the URL and/or page title of the history item referenced by that |
+ // HistoryID. |
+ HistoryIDWordMap history_id_word_map_; |
+ |
+ // A one-to-one mapping from HistoryID to the history item data governing |
+ // index inclusion and relevance scoring. |
+ HistoryInfoMap history_info_map_; |
+}; |
+ |
+} // namespace history |
+ |
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
Property changes on: chrome/browser/history/in_memory_url_index_types.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |