Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(781)

Unified Diff: chrome/browser/history/in_memory_url_index_types.h

Issue 8291005: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Rattle those Bots Senseless Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/history/in_memory_url_index.cc ('k') | chrome/browser/history/in_memory_url_index_types.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/history/in_memory_url_index_types.h
===================================================================
--- chrome/browser/history/in_memory_url_index_types.h (revision 0)
+++ chrome/browser/history/in_memory_url_index_types.h (revision 0)
@@ -0,0 +1,206 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/string16.h"
+#include "chrome/browser/autocomplete/history_provider_util.h"
+#include "chrome/browser/history/history_types.h"
+
+namespace history {
+
+// Matches within URL and Title Strings ----------------------------------------
+
+// Specifies where an omnibox term occurs within a string. Used for specifying
+// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in
+// scoring a result.
+struct TermMatch {
+ TermMatch() : term_num(0), offset(0), length(0) {}
+ TermMatch(int term_num, size_t offset, size_t length)
+ : term_num(term_num), offset(offset), length(length) {}
+
+ int term_num; // The index of the term in the original search string.
+ size_t offset; // The starting offset of the substring match.
+ size_t length; // The length of the substring match.
+};
+typedef std::vector<TermMatch> TermMatches;
+
+// Returns a TermMatches which has an entry for each occurrence of the string
+// |term| found in the string |string|. Mark each match with |term_num| so
+// that the resulting TermMatches can be merged with other TermMatches for
+// other terms. Note that only the first 2,048 characters of |string| are
+// considered during the match operation.
+TermMatches MatchTermInString(const string16& term,
+ const string16& string,
+ int term_num);
+
+// Sorts and removes overlapping substring matches from |matches| and
+// returns the cleaned up matches.
+TermMatches SortAndDeoverlapMatches(const TermMatches& matches);
+
+// Extracts and returns the offsets from |matches|.
+std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
+
+// Replaces the offsets in |matches| with those given in |offsets|, deleting
+// any which are npos, and returns the updated list of matches.
+TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
+ const std::vector<size_t>& offsets);
+
+// Used for intermediate history result operations -----------------------------
+
+struct ScoredHistoryMatch : public history::HistoryMatch {
+ ScoredHistoryMatch(); // Required by STL.
+ explicit ScoredHistoryMatch(const history::URLRow& url_info);
+ ~ScoredHistoryMatch();
+
+ static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
+ const ScoredHistoryMatch& m2);
+
+ // An interim score taking into consideration location and completeness
+ // of the match.
+ int raw_score;
+ TermMatches url_matches; // Term matches within the URL.
+ TermMatches title_matches; // Term matches within the page title.
+ bool can_inline; // True if this is a candidate for in-line autocompletion.
+};
+typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
+
+// Convenience Types -----------------------------------------------------------
+
+typedef std::vector<string16> String16Vector;
+typedef std::set<string16> String16Set;
+typedef std::set<char16> Char16Set;
+typedef std::vector<char16> Char16Vector;
+
+// Utility Functions Relates to Convenience Types ------------------------------
+
+// Breaks a string down into individual words.
+String16Set String16SetFromString16(const string16& uni_string);
+
+// Breaks the |uni_string| string down into individual words and return
+// a vector with the individual words in their original order. If
+// |break_on_space| is false then the resulting list will contain only words
+// containing alpha-numeric characters. If |break_on_space| is true then the
+// resulting list will contain strings broken at whitespace. (|break_on_space|
+// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE)
+// approach is to be used. For a complete description of this algorithm
+// refer to the comments in base/i18n/break_iterator.h.)
+//
+// Example:
+// Given: |uni_string|: "http://www.google.com/ harry the rabbit."
+// With |break_on_space| false the returned list will contain:
+// "http", "www", "google", "com", "harry", "the", "rabbit"
+// With |break_on_space| true the returned list will contain:
+// "http://", "www.google.com/", "harry", "the", "rabbit."
+String16Vector String16VectorFromString16(const string16& uni_string,
+ bool break_on_space);
+
+// Breaks the |uni_word| string down into its individual characters.
+// Note that this is temporarily intended to work on a single word, but
+// _will_ work on a string of words, perhaps with unexpected results.
+// TODO(mrossetti): Lots of optimizations possible here for not restarting
+// a search if the user is just typing along. Also, change this to uniString
+// and properly handle substring matches, scoring and sorting the results
+// by score. Also, provide the metrics for where the matches occur so that
+// the UI can highlight the matched sections.
+Char16Set Char16SetFromString16(const string16& uni_word);
+
+// Support for InMemoryURLIndex Private Data -----------------------------------
+
+// An index into a list of all of the words we have indexed.
+typedef size_t WordID;
+
+// A map allowing a WordID to be determined given a word.
+typedef std::map<string16, WordID> WordMap;
+
+// A map from character to the word_ids of words containing that character.
+typedef std::set<WordID> WordIDSet; // An index into the WordList.
+typedef std::map<char16, WordIDSet> CharWordIDMap;
+
+// A map from word (by word_id) to history items containing that word.
+typedef history::URLID HistoryID;
+typedef std::set<HistoryID> HistoryIDSet;
+typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
+typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap;
+
+// A map from history_id to the history's URL and title.
+typedef std::map<HistoryID, URLRow> HistoryInfoMap;
+
+// TODO(rohitrao): Probably replace this with QueryResults.
+typedef std::vector<history::URLRow> URLRowVector;
+
+// A structure describing the index's internal data. This structure is used
+// by the InMemoryURLIndex, InMemoryURLIndexBackend, and
+// InMemoryURLCacheDatabase classes.
+class URLIndexPrivateData {
+ public:
+ URLIndexPrivateData();
+ ~URLIndexPrivateData();
+
+ // Clear all of our data.
+ void Clear();
+
+ // Adds |word_id| to |history_id|'s entry in the history/word map,
+ // creating a new entry if one does not already exist.
+ void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
+
+ // Given a set of Char16s, finds words containing those characters.
+ WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
+
+ private:
+ friend class InMemoryURLIndex;
+ friend class InMemoryURLIndexTest;
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
+ FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
+
+ // A list of all of indexed words. The index of a word in this list is the
+ // ID of the word in the word_map_. It reduces the memory overhead by
+ // replacing a potentially long and repeated string with a simple index.
+ String16Vector word_list_;
+
+ // A list of available words slots in |word_list_|. An available word slot
+ // is the index of a unused word in word_list_ vector, also referred to as
+ // a WordID. As URL visits are added or modified new words may be added to
+ // the index, in which case any available words are used, if any, and then
+ // words are added to the end of the word_list_. When URL visits are
+ // modified or deleted old words may be removed from the index, in which
+ // case the slots for those words are added to available_words_ for resuse
+ // by future URL updates.
+ WordIDSet available_words_;
+
+ // A one-to-one mapping from the a word string to its slot number (i.e.
+ // WordID) in the |word_list_|.
+ WordMap word_map_;
+
+ // A one-to-many mapping from a single character to all WordIDs of words
+ // containing that character.
+ CharWordIDMap char_word_map_;
+
+ // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as
+ // used in the history database) of history items in which the word occurs.
+ WordIDHistoryMap word_id_history_map_;
+
+ // A one-to-many mapping from a HistoryID to all WordIDs of words that occur
+ // in the URL and/or page title of the history item referenced by that
+ // HistoryID.
+ HistoryIDWordMap history_id_word_map_;
+
+ // A one-to-one mapping from HistoryID to the history item data governing
+ // index inclusion and relevance scoring.
+ HistoryInfoMap history_info_map_;
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
Property changes on: chrome/browser/history/in_memory_url_index_types.h
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « chrome/browser/history/in_memory_url_index.cc ('k') | chrome/browser/history/in_memory_url_index_types.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698