Chromium Code Reviews| Index: chrome/browser/history/in_memory_url_index_types.h |
| =================================================================== |
| --- chrome/browser/history/in_memory_url_index_types.h (revision 0) |
| +++ chrome/browser/history/in_memory_url_index_types.h (revision 0) |
| @@ -0,0 +1,206 @@ |
| +// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
| +#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
| +#pragma once |
| + |
| +#include <map> |
| +#include <set> |
| +#include <string> |
| +#include <vector> |
| + |
| +#include "base/basictypes.h" |
| +#include "base/gtest_prod_util.h" |
| +#include "base/string16.h" |
| +#include "chrome/browser/autocomplete/history_provider_util.h" |
| +#include "chrome/browser/history/history_types.h" |
| + |
| +namespace history { |
| + |
| +// Matches within URL and Title Strings ---------------------------------------- |
| + |
| +// Specifies where an omnibox term occurs within a string. Used for specifying |
| +// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in |
| +// scoring a result. |
| +struct TermMatch { |
| + TermMatch() : term_num(0), offset(0), length(0) {} |
| + TermMatch(int term_num, size_t offset, size_t length) |
| + : term_num(term_num), offset(offset), length(length) {} |
|
Peter Kasting
2011/10/24 22:01:40
Nit: My reading is that each member needs its own
mrossetti
2011/10/25 16:15:05
Done.
|
| + |
| + int term_num; // The index of the term in the original search string. |
| + size_t offset; // The starting offset of the substring match. |
| + size_t length; // The length of the substring match. |
| +}; |
| +typedef std::vector<TermMatch> TermMatches; |
| + |
| +// Returns a TermMatches which has an entry for each occurrence of the string |
| +// |term| found in the string |string|. Mark each match with |term_num| so |
| +// that the resulting TermMatches can be merged with other TermMatches for |
| +// other terms. Note that only the first 2,048 characters of |string| are |
| +// considered during the match operation. |
| +TermMatches MatchTermInString(const string16& term, |
| + const string16& string, |
| + int term_num); |
| + |
| +// Sorts and removes overlapping substring matches from |matches| and |
| +// returns the cleaned up matches. |
| +TermMatches SortAndDeoverlapMatches(const TermMatches& matches); |
| + |
| +// Extracts and returns the offsets from |matches|. |
| +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); |
| + |
| +// Replaces the offsets in |matches| with those given in |offsets|, deleting |
| +// any which are npos, and returns the updated list of matches. |
| +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, |
| + const std::vector<size_t>& offsets); |
| + |
| +// Used for intermediate history result operations ----------------------------- |
| + |
| +struct ScoredHistoryMatch : public history::HistoryMatch { |
| + ScoredHistoryMatch(); // Required by STL. |
| + explicit ScoredHistoryMatch(const history::URLRow& url_info); |
| + ~ScoredHistoryMatch(); |
| + |
| + static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
| + const ScoredHistoryMatch& m2); |
| + |
| + // An interim score taking into consideration location and completeness |
| + // of the match. |
| + int raw_score; |
| + TermMatches url_matches; // Term matches within the URL. |
| + TermMatches title_matches; // Term matches within the page title. |
| + bool can_inline; // True if this is a candidate for in-line autocompletion. |
| +}; |
| +typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
| + |
| +// Convenience Types ----------------------------------------------------------- |
| + |
| +typedef std::vector<string16> String16Vector; |
| +typedef std::set<string16> String16Set; |
| +typedef std::set<char16> Char16Set; |
| +typedef std::vector<char16> Char16Vector; |
| + |
| +// Utility Functions Relates to Convenience Types ------------------------------ |
| + |
| +// Breaks a string down into individual words. |
| +String16Set String16SetFromString16(const string16& uni_string); |
| + |
| +// Breaks the |uni_string| string down into individual words and return |
| +// a vector with the individual words in their original order. If |
| +// |break_on_space| is false then the resulting list will contain only words |
| +// containing alpha-numeric characters. If |break_on_space| is true then the |
| +// resulting list will contain strings broken at whitespace. (|break_on_space| |
| +// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) |
| +// approach is to be used. For a complete description of this algorithm |
| +// refer to the comments in base/i18n/break_iterator.h.) |
| +// |
| +// Example: |
| +// Given: |uni_string|: "http://www.google.com/ harry the rabbit." |
| +// With |break_on_space| false the returned list will contain: |
| +// "http", "www", "google", "com", "harry", "the", "rabbit" |
| +// With |break_on_space| true the returned list will contain: |
| +// "http://", "www.google.com/", "harry", "the", "rabbit." |
| +String16Vector String16VectorFromString16(const string16& uni_string, |
| + bool break_on_space); |
| + |
| +// Breaks the |uni_word| string down into its individual characters. |
| +// Note that this is temporarily intended to work on a single word, but |
| +// _will_ work on a string of words, perhaps with unexpected results. |
| +// TODO(mrossetti): Lots of optimizations possible here for not restarting |
| +// a search if the user is just typing along. Also, change this to uniString |
| +// and properly handle substring matches, scoring and sorting the results |
| +// by score. Also, provide the metrics for where the matches occur so that |
| +// the UI can highlight the matched sections. |
| +Char16Set Char16SetFromString16(const string16& uni_word); |
| + |
| +// Support for InMemoryURLIndex Private Data ----------------------------------- |
| + |
| +// An index into a list of all of the words we have indexed. |
| +typedef size_t WordID; |
| + |
| +// A map allowing a WordID to be determined given a word. |
| +typedef std::map<string16, WordID> WordMap; |
| + |
| +// A map from character to the word_ids of words containing that character. |
| +typedef std::set<WordID> WordIDSet; // An index into the WordList. |
| +typedef std::map<char16, WordIDSet> CharWordIDMap; |
| + |
| +// A map from word (by word_id) to history items containing that word. |
| +typedef history::URLID HistoryID; |
| +typedef std::set<HistoryID> HistoryIDSet; |
| +typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; |
| +typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap; |
| + |
| +// A map from history_id to the history's URL and title. |
| +typedef std::map<HistoryID, URLRow> HistoryInfoMap; |
| + |
| +// TODO(rohitrao): Probably replace this with QueryResults. |
| +typedef std::vector<history::URLRow> URLRowVector; |
| + |
| +// A structure describing the index's internal data. This structure is used |
| +// by the InMemoryURLIndex, InMemoryURLIndexBackend, and |
| +// InMemoryURLCacheDatabase classes. |
| +class URLIndexPrivateData { |
| + public: |
| + URLIndexPrivateData(); |
| + ~URLIndexPrivateData(); |
| + |
| + // Clear all of our data. |
| + void Clear(); |
| + |
| + // Adds |word_id| to |history_id|'s entry in the history/word map, |
| + // creating a new entry if one does not already exist. |
| + void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
| + |
| + // Given a set of Char16s, finds words containing those characters. |
| + WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| + |
| + private: |
| + friend class InMemoryURLIndex; |
| + friend class InMemoryURLIndexTest; |
| + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| + FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| + |
| + // A list of all of indexed words. The index of a word in this list is the |
| + // ID of the word in the word_map_. It reduces the memory overhead by |
| + // replacing a potentially long and repeated string with a simple index. |
| + String16Vector word_list_; |
| + |
| + // A list of available words slots in |word_list_|. An available word slot |
| + // is the index of a unused word in word_list_ vector, also referred to as |
| + // a WordID. As URL visits are added or modified new words may be added to |
| + // the index, in which case any available words are used, if any, and then |
| + // words are added to the end of the word_list_. When URL visits are |
| + // modified or deleted old words may be removed from the index, in which |
| + // case the slots for those words are added to available_words_ for resuse |
| + // by future URL updates. |
| + WordIDSet available_words_; |
| + |
| + // A one-to-one mapping from the a word string to its slot number (i.e. |
| + // WordID) in the |word_list_|. |
| + WordMap word_map_; |
| + |
| + // A one-to-many mapping from a single character to all WordIDs of words |
| + // containing that character. |
| + CharWordIDMap char_word_map_; |
| + |
| + // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as |
| + // used in the history database) of history items in which the word occurs. |
| + WordIDHistoryMap word_id_history_map_; |
| + |
| + // A one-to-many mapping from a HistoryID to all WordIDs of words that occur |
| + // in the URL and/or page title of the history item referenced by that |
| + // HistoryID. |
| + HistoryIDWordMap history_id_word_map_; |
| + |
| + // A one-to-one mapping from HistoryID to the history item data governing |
| + // index inclusion and relevance scoring. |
| + HistoryInfoMap history_info_map_; |
| +}; |
| + |
| +} // namespace history |
| + |
| +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ |
| Property changes on: chrome/browser/history/in_memory_url_index_types.h |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + LF |