Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(593)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 3197008: Revert 56962 - Next step integrating the HistoryQuickProvider: Implement inde... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/in_memory_url_index.h" 5 #include "chrome/browser/history/in_memory_url_index.h"
6 6
7 #include <algorithm>
8 #include <limits>
9
10 #include "app/l10n_util.h"
11 #include "base/i18n/word_iterator.h"
12 #include "base/string_util.h"
13 #include "base/time.h"
14 #include "base/utf_string_conversions.h"
15 #include "chrome/browser/history/url_database.h" 7 #include "chrome/browser/history/url_database.h"
16 #include "net/base/escape.h"
17 #include "net/base/net_util.h"
18
19 using base::Time;
20 using base::TimeDelta;
21 8
22 namespace history { 9 namespace history {
23 10
24 InMemoryURLIndex::InMemoryURLIndex() : history_item_count_(0) {}
25
26 InMemoryURLIndex::~InMemoryURLIndex() {}
27
28 // Indexing 11 // Indexing
29 12
30 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, 13 bool InMemoryURLIndex::Init(history::URLDatabase* history_db) {
31 const string16& languages) { 14 bool success = true;
32 // TODO(mrossetti): Register for profile/language change notifications. 15 // TODO(mrossetti): Implement.
33 languages_ = languages; 16 return success;
34 // Reset our indexes.
35 char_word_map_.clear();
36 word_id_history_map_.clear();
37 if (!history_db)
38 return false;
39 URLDatabase::URLEnumerator history_enum;
40 if (history_db->InitURLEnumeratorForEverything(&history_enum)) {
41 URLRow row;
42 Time recent_threshold = InMemoryURLIndex::RecentThreshold();
43 while (history_enum.GetNextURL(&row)) {
44 // Do some filtering so that we only get history items which could
45 // possibly pass the HistoryURLProvider::CullPoorMatches filter later.
46 if ((row.typed_count() > kLowQualityMatchTypedLimit) ||
47 (row.visit_count() > kLowQualityMatchVisitLimit) ||
48 (row.last_visit() >= recent_threshold)) {
49 if (!IndexRow(row))
50 return false;
51 }
52 }
53 }
54 return true;
55 }
56
57 bool InMemoryURLIndex::IndexRow(URLRow row) {
58 const GURL& gurl(row.url());
59 string16 url(WideToUTF16(net::FormatUrl(gurl, UTF16ToWide(languages_),
60 net::kFormatUrlOmitUsernamePassword,
61 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, NULL, NULL,
62 NULL)));
63
64 // TODO(mrossetti): Find or implement a ConvertPercentEncoding and use it
65 // on the url.
66
67 // TODO(mrossetti): Detect row_id > std::numeric_limits<HistoryID>::max().
68 HistoryID history_id = static_cast<HistoryID>(row.id());
69
70 // Add the row for quick lookup in the history info store.
71 url = l10n_util::ToLower(url);
72 URLRow new_row(GURL(url), row.id());
73 new_row.set_visit_count(row.visit_count());
74 new_row.set_typed_count(row.typed_count());
75 new_row.set_last_visit(row.last_visit());
76 new_row.set_title(row.title());
77 history_info_map_.insert(std::make_pair(history_id, new_row));
78
79 // Split into individual, unique words.
80 String16Set words = WordsFromString16(url);
81
82 // For each word, add a new entry into the word index referring to the
83 // associated history item.
84 for (String16Set::iterator iter = words.begin();
85 iter != words.end(); ++iter) {
86 String16Set::value_type uni_word = *iter;
87 AddWordToIndex(uni_word, history_id);
88 }
89 ++history_item_count_;
90 return true;
91 }
92
93 // Utility Functions
94
95 InMemoryURLIndex::String16Set InMemoryURLIndex::WordsFromString16(
96 const string16& uni_string) {
97 String16Set words;
98
99 // TODO(mrossetti): Replace all | and _'s with a space, all % quoted
100 // characters with real characters, and break into words, using
101 // appropriate string16 functions.
102 WordIterator iter(&uni_string, WordIterator::BREAK_WORD);
103 if (iter.Init()) {
104 while (iter.Advance()) {
105 if (iter.IsWord()) {
106 words.insert(iter.GetWord());
107 }
108 }
109 }
110 return words;
111 }
112
113 InMemoryURLIndex::Char16Set InMemoryURLIndex::CharactersFromString16(
114 const string16& uni_word) {
115 Char16Set characters;
116 for (string16::const_iterator iter = uni_word.begin();
117 iter != uni_word.end(); ++iter)
118 characters.insert(*iter);
119 return characters;
120 }
121
122 void InMemoryURLIndex::AddWordToIndex(const string16& uni_word,
123 HistoryID history_id) {
124 WordMap::iterator word_pos = word_map_.find(uni_word);
125 if (word_pos != word_map_.end())
126 UpdateWordHistory(word_pos->second, history_id);
127 else
128 AddWordHistory(uni_word, history_id);
129 }
130
131 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
132 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
133 DCHECK(history_pos != word_id_history_map_.end());
134 HistoryIDSet& history_id_set(history_pos->second);
135 history_id_set.insert(history_id);
136 }
137
138 // Add a new word to the word list and the word map, and then create a
139 // new entry in the word/history map.
140 void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
141 HistoryID history_id) {
142 word_list_.push_back(uni_word);
143 WordID word_id = word_list_.size() - 1;
144 DCHECK(word_map_.insert(std::make_pair(uni_word, word_id)).second);
145 HistoryIDSet history_id_set;
146 history_id_set.insert(history_id);
147 DCHECK(word_id_history_map_.insert(
148 std::make_pair(word_id, history_id_set)).second);
149 // For each character in the newly added word (i.e. a word that is not
150 // already in the word index), add the word to the character index.
151 Char16Set characters = CharactersFromString16(uni_word);
152 for (Char16Set::iterator uni_char_iter = characters.begin();
153 uni_char_iter != characters.end(); ++uni_char_iter) {
154 Char16Set::value_type uni_string = *uni_char_iter;
155 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_string);
156 if (char_iter != char_word_map_.end()) {
157 // Update existing entry in the char/word index.
158 WordIDSet& word_id_set(char_iter->second);
159 word_id_set.insert(word_id);
160 } else {
161 // Create a new entry in the char/word index.
162 WordIDSet word_id_set;
163 word_id_set.insert(word_id);
164 DCHECK(char_word_map_.insert(std::make_pair(uni_string,
165 word_id_set)).second);
166 }
167 }
168 }
169
170 // static
171 Time InMemoryURLIndex::RecentThreshold() {
172 return Time::Now() - TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays);
173 } 17 }
174 18
175 } // namespace history 19 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_url_index.h ('k') | chrome/browser/history/in_memory_url_index_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698