Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(246)

Side by Side Diff: chrome/browser/history/in_memory_url_index.h

Issue 3138006: Next step integrating the HistoryQuickProvider: Implement index initializatio... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
7 #pragma once 7 #pragma once
8 8
9 #include <map>
10 #include <set>
11 #include <vector>
12
13 #include "app/sql/connection.h"
14 #include "base/basictypes.h"
15 #include "base/linked_ptr.h"
16 #include "base/scoped_ptr.h"
17 #include "base/string16.h"
18 #include "chrome/browser/history/history_types.h"
19 #include "testing/gtest/include/gtest/gtest_prod.h"
20
21 namespace base {
22 class Time;
23 }
24
9 namespace history { 25 namespace history {
10 26
11 class URLDatabase; 27 class URLDatabase;
12 28
13 // The URL history source. 29 // The URL history source.
14 // Holds portions of the URL database in memory in an indexed form. Used to 30 // Holds portions of the URL database in memory in an indexed form. Used to
15 // quickly look up matching URLs for a given query string. Used by 31 // quickly look up matching URLs for a given query string. Used by
16 // the HistoryURLProvider for inline autocomplete and to provide URL 32 // the HistoryURLProvider for inline autocomplete and to provide URL
17 // matches to the omnibox. 33 // matches to the omnibox.
34 //
35 // Note about multi-byte codepoints and the data structures in the
36 // InMemoryURLIndex class: One will quickly notice that no effort is made to
37 // insure that multi-byte character boundaries are detected when indexing the
38 // words and characters in the URL history database except when converting
39 // URL strings to lowercase. Multi-byte-edness makes no difference when
40 // indexing or when searching the index as the final filtering of results
41 // is dependent on the comparison of a string of bytes, not individual
42 // characters. While the lookup of those bytes during a search in the
43 // |char_word_map_| could serve up words in which the individual char16
44 // occurs as a portion of a composite character the next filtering step
45 // will eliminate such words except in the case where a single character
46 // is being searched on and which character occurs as the second char16 of a
47 // multi-char16 instance.
18 class InMemoryURLIndex { 48 class InMemoryURLIndex {
19 public: 49 public:
20 InMemoryURLIndex() {} 50 InMemoryURLIndex();
21 ~InMemoryURLIndex() {} 51 ~InMemoryURLIndex();
52
53 // Convenience types
54 typedef std::vector<string16> String16Vector;
22 55
23 // Open and index the URL history database. 56 // Open and index the URL history database.
24 bool Init(URLDatabase* history_db); 57 bool Init(URLDatabase* history_db, const string16& languages);
58
59 // Reset the history index.
60 void Reset();
61
62 // Given a vector containing one or more words as string16s, scan the
63 // history index and return a vector with all scored, matching history items.
64 // Each term must occur somewhere in the history item for the item to
65 // qualify; however, the terms do not necessarily have to be adjacent.
66 HistoryMatches HistoryItemsForTerms(const String16Vector& terms);
67
68 // Returns the date threshold for considering an history item as significant.
69 static base::Time RecentThreshold();
70
71 private:
72 friend class InMemoryURLIndexTest;
73 FRIEND_TEST(InMemoryURLIndexTest, Initialization);
74
75 // Convenience types
76 typedef std::set<string16> String16Set;
77 typedef std::set<char16> Char16Set;
78
79 // An index into list of all of the words we have indexed.
80 typedef int16 WordID;
81
82 // A map allowing a WordID to be determined given a word.
83 typedef std::map<string16, WordID> WordMap;
84
85 // A map from character to word_ids.
86 typedef std::set<WordID> WordIDSet; // An index into the WordList.
87 typedef std::map<char16, WordIDSet> CharWordIDMap;
88
89 // A map from word_id to history item.
90 // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.
91 // Consider using a smaller type.
92 typedef URLID HistoryID;
93 typedef std::set<HistoryID> HistoryIDSet;
94 typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
95
96 // Support caching of term character intersections so that we can optimize
97 // searches which build upon a previous search.
98 struct TermCharWordSet {
99 TermCharWordSet(Char16Set char_set, WordIDSet word_id_set, bool used)
100 : char_set_(char_set),
101 word_id_set_(word_id_set),
102 used_(used) {}
103
104 Char16Set char_set_;
105 WordIDSet word_id_set_;
106 bool used_; // true if this set has been used for the current term search.
107 };
108 typedef std::vector<TermCharWordSet> TermCharWordSetVector;
109
110 // TODO(rohitrao): Probably replace this with QueryResults.
111 typedef std::vector<URLRow> URLRowVector;
112
113 // A map from history_id to the history's URL and title.
114 typedef std::map<HistoryID, URLRow> HistoryInfoMap;
115
116 // Break a string down into individual words.
117 String16Set WordsFromString16(const string16& uni_string);
118
119 // URL History indexing support functions.
120
121 // Index one URL history item.
122 bool IndexRow(URLRow row);
123
124 // Break a string down into its individual characters.
125 // Note that this is temporarily intended to work on a single word, but
126 // _will_ work on a string of words, perhaps with unexpected results.
127 // TODO(mrossetti): Lots of optimizations possible here for not restarting
128 // a search if the user is just typing along. Also, change this to uniString
129 // and properly handle substring matches, scoring and sorting the results
130 // by score. Also, provide the metrics for where the matches occur so that
131 // the UI can highlight the matched sections.
132 Char16Set CharactersFromString16(const string16& uni_word);
133
134 // Given a single word, add a reference to the containing history item
135 // to the index.
136 void AddWordToIndex(const string16& uni_word, HistoryID history_id);
137
138 // Update an existing entry in the word/history index by adding the
139 // |history_id| to set for |word_id| in the word_id_history_map_.
140 void UpdateWordHistory(WordID word_id, HistoryID history_id);
141
142 // Create a new entry in the word/history map for |word_id| and add
143 // |history_id| as the initial element of the word's set.
144 void AddWordHistory(const string16& uni_word, HistoryID history_id);
145
146 // A list of all of indexed words. The index of a word in this list is the
147 // ID of the word in the word_map_. It reduces the memory overhead by
148 // replacing a potentially long and repeated string with a simple index.
149 // NOTE: A word will _never_ be removed from this vector thus insuring
150 // the immutability of the word_id throughout the session, reducing
151 // maintenance complexity.
152 String16Vector word_list_;
153
154 uint64 history_item_count_;
155 WordMap word_map_;
156 CharWordIDMap char_word_map_;
157 WordIDHistoryMap word_id_history_map_;
158 TermCharWordSetVector term_char_word_set_cache_;
159 HistoryInfoMap history_info_map_;
160 string16 languages_;
161
162 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
25 }; 163 };
26 164
27 } // namespace history 165 } // namespace history
28 166
29 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 167 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_history_backend.cc ('k') | chrome/browser/history/in_memory_url_index.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698