chrome/browser/history/in_memory_url_index.cc - Issue 8359019: Create Private Data for InMemoryURLIndex (in Preparation for SQLite Cache)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8359019: Create Private Data for InMemoryURLIndex (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/in_memory_url_index.h"	5 #include "chrome/browser/history/in_memory_url_index.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

11 #include <numeric>	11 #include <numeric>

12	12

13 #include "base/file_util.h"	13 #include "base/file_util.h"

14 #include "base/i18n/break_iterator.h"

15 #include "base/i18n/case_conversion.h"	14 #include "base/i18n/case_conversion.h"

16 #include "base/metrics/histogram.h"	15 #include "base/metrics/histogram.h"

17 #include "base/string_util.h"	16 #include "base/string_util.h"

18 #include "base/threading/thread_restrictions.h"	17 #include "base/threading/thread_restrictions.h"

19 #include "base/time.h"	18 #include "base/time.h"

20 #include "base/utf_string_conversions.h"	19 #include "base/utf_string_conversions.h"

21 #include "chrome/browser/autocomplete/autocomplete.h"	20 #include "chrome/browser/autocomplete/autocomplete.h"

22 #include "chrome/browser/autocomplete/history_provider_util.h"	21 #include "chrome/browser/autocomplete/history_provider_util.h"

23 #include "chrome/browser/history/url_database.h"	22 #include "chrome/browser/history/url_database.h"

24 #include "chrome/browser/profiles/profile.h"	23 #include "chrome/browser/profiles/profile.h"

(...skipping 27 matching lines...) Expand all Loading...
52 HistoryInfoMapEntry;	51 HistoryInfoMapEntry;

53	52

54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;	53 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;

55	54

56 // Score ranges used to get a 'base' score for each of the scoring factors	55 // Score ranges used to get a 'base' score for each of the scoring factors

57 // (such as recency of last visit, times visited, times the URL was typed,	56 // (such as recency of last visit, times visited, times the URL was typed,

58 // and the quality of the string match). There is a matching value range for	57 // and the quality of the string match). There is a matching value range for

59 // each of these scores for each factor.	58 // each of these scores for each factor.

60 const int kScoreRank[] = { 1425, 1200, 900, 400 };	59 const int kScoreRank[] = { 1425, 1200, 900, 400 };

61	60

62 ScoredHistoryMatch::ScoredHistoryMatch()

63 : raw_score(0),

64 can_inline(false) {}

65

66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)

67 : HistoryMatch(url_info, 0, false, false),

68 raw_score(0),

69 can_inline(false) {}

70

71 ScoredHistoryMatch::~ScoredHistoryMatch() {}

72

73 // Comparison function for sorting ScoredMatches by their scores.

74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,

75 const ScoredHistoryMatch& m2) {

76 return m1.raw_score >= m2.raw_score;

77 }

78

79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(	61 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(

80 const WordIDSet& word_id_set,	62 const WordIDSet& word_id_set,

81 const HistoryIDSet& history_id_set)	63 const HistoryIDSet& history_id_set)

82 : word_id_set_(word_id_set),	64 : word_id_set_(word_id_set),

83 history_id_set_(history_id_set),	65 history_id_set_(history_id_set),

84 used_(true) {}	66 used_(true) {}

85	67

86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()	68 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()

87 : used_(true) {}	69 : used_(true) {}

88	70

89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}	71 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}

90	72

91 // Comparison function for sorting TermMatches by their offsets.

92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {

93 return m1.offset < m2.offset;

94 }

95

96 // Comparison function for sorting search terms by descending length.	73 // Comparison function for sorting search terms by descending length.

97 bool LengthGreater(const string16& string_a, const string16& string_b) {	74 bool LengthGreater(const string16& string_a, const string16& string_b) {

98 return string_a.length() > string_b.length();	75 return string_a.length() > string_b.length();

99 }	76 }

100	77

101 // std::accumulate helper function to add up TermMatches' lengths.	78 // std::accumulate helper function to add up TermMatches' lengths.

102 int AccumulateMatchLength(int total, const TermMatch& match) {	79 int AccumulateMatchLength(int total, const TermMatch& match) {

103 return total + match.length;	80 return total + match.length;

104 }	81 }

105	82

(...skipping 24 matching lines...) Expand all Loading...
130 if (i > 0) {	107 if (i > 0) {

131 score += (value - value_ranks[i]) *	108 score += (value - value_ranks[i]) *

132 (kScoreRank[i - 1] - kScoreRank[i]) /	109 (kScoreRank[i - 1] - kScoreRank[i]) /

133 (value_ranks[i - 1] - value_ranks[i]);	110 (value_ranks[i - 1] - value_ranks[i]);

134 }	111 }

135 return score;	112 return score;

136 }	113 }

137	114

138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)	115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)

139 : history_dir_(history_dir),	116 : history_dir_(history_dir),

140 history_item_count_(0),	117 private_data_(new URLIndexPrivateData),

141 cached_at_shutdown_(false) {	118 cached_at_shutdown_(false) {

142 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);	119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);

143 }	120 }

144	121

145 // Called only by unit tests.	122 // Called only by unit tests.

146 InMemoryURLIndex::InMemoryURLIndex()	123 InMemoryURLIndex::InMemoryURLIndex()

147 : history_item_count_(0),	124 : private_data_(new URLIndexPrivateData),

148 cached_at_shutdown_(false) {	125 cached_at_shutdown_(false) {

149 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);	126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);

150 }	127 }

151	128

152 InMemoryURLIndex::~InMemoryURLIndex() {	129 InMemoryURLIndex::~InMemoryURLIndex() {

153 // If there was a history directory (which there won't be for some unit tests)	130 // If there was a history directory (which there won't be for some unit tests)

154 // then insure that the cache has already been saved.	131 // then insure that the cache has already been saved.

155 DCHECK(history_dir_.empty() \|\| cached_at_shutdown_);	132 DCHECK(history_dir_.empty() \|\| cached_at_shutdown_);

156 }	133 }

157	134

158 // static	135 // static

159 void InMemoryURLIndex::InitializeSchemeWhitelist(	136 void InMemoryURLIndex::InitializeSchemeWhitelist(

160 std::set<std::string>* whitelist) {	137 std::set<std::string>* whitelist) {

161 DCHECK(whitelist);	138 DCHECK(whitelist);

162 whitelist->insert(std::string(chrome::kAboutScheme));	139 whitelist->insert(std::string(chrome::kAboutScheme));

163 whitelist->insert(std::string(chrome::kChromeUIScheme));	140 whitelist->insert(std::string(chrome::kChromeUIScheme));

164 whitelist->insert(std::string(chrome::kFileScheme));	141 whitelist->insert(std::string(chrome::kFileScheme));

165 whitelist->insert(std::string(chrome::kFtpScheme));	142 whitelist->insert(std::string(chrome::kFtpScheme));

166 whitelist->insert(std::string(chrome::kHttpScheme));	143 whitelist->insert(std::string(chrome::kHttpScheme));

167 whitelist->insert(std::string(chrome::kHttpsScheme));	144 whitelist->insert(std::string(chrome::kHttpsScheme));

168 whitelist->insert(std::string(chrome::kMailToScheme));	145 whitelist->insert(std::string(chrome::kMailToScheme));

169 }	146 }

170	147

171 // Indexing	148 // Indexing

172	149

173 bool InMemoryURLIndex::Init(history::URLDatabase* history_db,	150 bool InMemoryURLIndex::Init(URLDatabase* history_db,

174 const std::string& languages) {	151 const std::string& languages) {

175 // TODO(mrossetti): Register for profile/language change notifications.	152 // TODO(mrossetti): Register for profile/language change notifications.

176 languages_ = languages;	153 languages_ = languages;

177 return ReloadFromHistory(history_db, false);	154 return ReloadFromHistory(history_db, false);

178 }	155 }

179	156

180 void InMemoryURLIndex::ShutDown() {	157 void InMemoryURLIndex::ShutDown() {

181 // Write our cache.	158 // Write our cache.

182 SaveToCacheFile();	159 SaveToCacheFile();

183 cached_at_shutdown_ = true;	160 cached_at_shutdown_ = true;

184 }	161 }

185	162

186 bool InMemoryURLIndex::IndexRow(const URLRow& row) {	163 void InMemoryURLIndex::IndexRow(const URLRow& row) {

187 const GURL& gurl(row.url());	164 const GURL& gurl(row.url());

188	165

189 // Index only URLs with a whitelisted scheme.	166 // Index only URLs with a whitelisted scheme.

190 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))	167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))

191 return true;	168 return;

192	169

	170 URLID row_id = row.id();

	171 // Strip out username and password before saving and indexing.

193 string16 url(net::FormatUrl(gurl, languages_,	172 string16 url(net::FormatUrl(gurl, languages_,

194 net::kFormatUrlOmitUsernamePassword,	173 net::kFormatUrlOmitUsernamePassword,

195 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS,	174 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS,

196 NULL, NULL, NULL));	175 NULL, NULL, NULL));

197	176

198 HistoryID history_id = static_cast<HistoryID>(row.id());	177 HistoryID history_id = static_cast<HistoryID>(row_id);

199 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());	178 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

200	179

201 // Add the row for quick lookup in the history info store.	180 // Add the row for quick lookup in the history info store.

202 URLRow new_row(GURL(url), row.id());	181 URLRow new_row(GURL(url), row_id);

203 new_row.set_visit_count(row.visit_count());	182 new_row.set_visit_count(row.visit_count());

204 new_row.set_typed_count(row.typed_count());	183 new_row.set_typed_count(row.typed_count());

205 new_row.set_last_visit(row.last_visit());	184 new_row.set_last_visit(row.last_visit());

206 new_row.set_title(row.title());	185 new_row.set_title(row.title());

207 history_info_map_[history_id] = new_row;	186 private_data_->history_info_map_[history_id] = new_row;

208	187

	188 // Index the words contained in the URL and title of the row.

	189 AddRowWordsToIndex(new_row);

	190 return;

	191 }

	192

	193 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {

	194 HistoryID history_id = static_cast<HistoryID>(row.id());

209 // Split URL into individual, unique words then add in the title words.	195 // Split URL into individual, unique words then add in the title words.

	196 const GURL& gurl(row.url());

	197 string16 url(net::FormatUrl(gurl, languages_,

	198 net::kFormatUrlOmitUsernamePassword,

	199 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS,

	200 NULL, NULL, NULL));

210 url = base::i18n::ToLower(url);	201 url = base::i18n::ToLower(url);

211 String16Set url_words = WordSetFromString16(url);	202 String16Set url_words = String16SetFromString16(url);

212 String16Set title_words = WordSetFromString16(row.title());	203 String16Set title_words = String16SetFromString16(row.title());

213 String16Set words;	204 String16Set words;

214 std::set_union(url_words.begin(), url_words.end(),	205 std::set_union(url_words.begin(), url_words.end(),

215 title_words.begin(), title_words.end(),	206 title_words.begin(), title_words.end(),

216 std::insert_iterator<String16Set>(words, words.begin()));	207 std::insert_iterator<String16Set>(words, words.begin()));

217 for (String16Set::iterator word_iter = words.begin();	208 for (String16Set::iterator word_iter = words.begin();

218 word_iter != words.end(); ++word_iter)	209 word_iter != words.end(); ++word_iter)

219 AddWordToIndex(*word_iter, history_id);	210 AddWordToIndex(*word_iter, history_id);

220	211

221 ++history_item_count_;	212 search_term_cache_.clear(); // Invalidate the term cache.

222 return true;	213 }

	214

	215 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {

	216 RemoveRowWordsFromIndex(row);

	217 HistoryID history_id = static_cast<HistoryID>(row.id());

	218 private_data_->history_info_map_.erase(history_id);

	219 }

	220

	221 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {

	222 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

	223 // this row.

	224 URLIndexPrivateData& private_data(*(private_data_.get()));

	225 HistoryID history_id = static_cast<HistoryID>(row.id());

	226 WordIDSet word_id_set = private_data.history_id_word_map_[history_id];

	227 private_data.history_id_word_map_.erase(history_id);

	228

	229 // Reconcile any changes to word usage.

	230 for (WordIDSet::iterator word_id_iter = word_id_set.begin();

	231 word_id_iter != word_id_set.end(); ++word_id_iter) {

	232 WordID word_id = *word_id_iter;

	233 private_data.word_id_history_map_[word_id].erase(history_id);

	234 if (!private_data.word_id_history_map_[word_id].empty())

	235 continue; // The word is still in use.

	236

	237 // The word is no longer in use. Reconcile any changes to character usage.

	238 string16 word = private_data.word_list_[word_id];

	239 Char16Set characters = Char16SetFromString16(word);

	240 for (Char16Set::iterator uni_char_iter = characters.begin();

	241 uni_char_iter != characters.end(); ++uni_char_iter) {

	242 char16 uni_char = *uni_char_iter;

	243 private_data.char_word_map_[uni_char].erase(word_id);

	244 if (private_data.char_word_map_[uni_char].empty())

	245 private_data.char_word_map_.erase(uni_char); // No longer in use.

	246 }

	247

	248 // Complete the removal of references to the word.

	249 private_data.word_id_history_map_.erase(word_id);

	250 private_data.word_map_.erase(word);

	251 private_data.word_list_[word_id] = string16();

	252 private_data.available_words_.insert(word_id);

	253 }

223 }	254 }

224	255

225 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,	256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,

226 bool clear_cache) {	257 bool clear_cache) {

227 ClearPrivateData();	258 ClearPrivateData();

228	259

229 if (!history_db)	260 if (!history_db)

230 return false;	261 return false;

231	262

232 if (clear_cache \|\| !RestoreFromCacheFile()) {	263 if (clear_cache \|\| !RestoreFromCacheFile()) {

233 base::TimeTicks beginning_time = base::TimeTicks::Now();	264 base::TimeTicks beginning_time = base::TimeTicks::Now();

234 // The index has to be built from scratch.	265 // The index has to be built from scratch.

235 URLDatabase::URLEnumerator history_enum;	266 URLDatabase::URLEnumerator history_enum;

236 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))	267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

237 return false;	268 return false;

238 URLRow row;	269 URLRow row;

239 while (history_enum.GetNextURL(&row)) {	270 while (history_enum.GetNextURL(&row))

240 if (!IndexRow(row))	271 IndexRow(row);

241 return false;

242 }

243 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",	272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

244 base::TimeTicks::Now() - beginning_time);	273 base::TimeTicks::Now() - beginning_time);

245 SaveToCacheFile();	274 SaveToCacheFile();

246 }	275 }

247 return true;	276 return true;

248 }	277 }

249	278

250 void InMemoryURLIndex::ClearPrivateData() {	279 void InMemoryURLIndex::ClearPrivateData() {

251 history_item_count_ = 0;	280 private_data_->Clear();

252 word_list_.clear();

253 word_map_.clear();

254 char_word_map_.clear();

255 word_id_history_map_.clear();

256 history_info_map_.clear();

257 search_term_cache_.clear();	281 search_term_cache_.clear();

258 }	282 }

259	283

260 bool InMemoryURLIndex::RestoreFromCacheFile() {	284 bool InMemoryURLIndex::RestoreFromCacheFile() {

261 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.	285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.

262 // That is: ensure that the database has not been modified since the cache	286 // That is: ensure that the database has not been modified since the cache

263 // was last saved. DB file modification date is inadequate. There are no	287 // was last saved. DB file modification date is inadequate. There are no

264 // SQLite table checksums automatically stored.	288 // SQLite table checksums automatically stored.

265 // FIXME(mrossetti): Move File IO to another thread.	289 // FIXME(mrossetti): Move File IO to another thread.

266 base::ThreadRestrictions::ScopedAllowIO allow_io;	290 base::ThreadRestrictions::ScopedAllowIO allow_io;

(...skipping 15 matching lines...) Expand all Loading...
282 return false;	306 return false;

283 }	307 }

284	308

285 if (!RestorePrivateData(index_cache)) {	309 if (!RestorePrivateData(index_cache)) {

286 ClearPrivateData(); // Back to square one -- must build from scratch.	310 ClearPrivateData(); // Back to square one -- must build from scratch.

287 return false;	311 return false;

288 }	312 }

289	313

290 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",	314 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

291 base::TimeTicks::Now() - beginning_time);	315 base::TimeTicks::Now() - beginning_time);

292 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);	316 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

	317 private_data_->history_id_word_map_.size());

293 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());	318 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

294 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());	319 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

295 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());	320 private_data_->word_map_.size());

	321 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	322 private_data_->char_word_map_.size());

296 return true;	323 return true;

297 }	324 }

298	325

299 bool InMemoryURLIndex::SaveToCacheFile() {	326 bool InMemoryURLIndex::SaveToCacheFile() {

300 // TODO(mrossetti): Move File IO to another thread.	327 // TODO(mrossetti): Move File IO to another thread.

301 base::ThreadRestrictions::ScopedAllowIO allow_io;	328 base::ThreadRestrictions::ScopedAllowIO allow_io;

302 FilePath file_path;	329 FilePath file_path;

303 if (!GetCacheFilePath(&file_path))	330 if (!GetCacheFilePath(&file_path))

304 return false;	331 return false;

305	332

(...skipping 14 matching lines...) Expand all Loading...
320 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",	347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

321 base::TimeTicks::Now() - beginning_time);	348 base::TimeTicks::Now() - beginning_time);

322 return true;	349 return true;

323 }	350 }

324	351

325 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {	352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {

326 // The row may or may not already be in our index. If it is not already	353 // The row may or may not already be in our index. If it is not already

327 // indexed and it qualifies then it gets indexed. If it is already	354 // indexed and it qualifies then it gets indexed. If it is already

328 // indexed and still qualifies then it gets updated, otherwise it	355 // indexed and still qualifies then it gets updated, otherwise it

329 // is deleted from the index.	356 // is deleted from the index.

330 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);	357 HistoryInfoMap::iterator row_pos =

331 if (row_pos == history_info_map_.end()) {	358 private_data_->history_info_map_.find(row_id);

	359 if (row_pos == private_data_->history_info_map_.end()) {

332 // This new row should be indexed if it qualifies.	360 // This new row should be indexed if it qualifies.

333 if (RowQualifiesAsSignificant(row, base::Time()))	361 URLRow new_row(row);

334 IndexRow(row);	362 new_row.set_id(row_id);

	363 if (RowQualifiesAsSignificant(new_row, base::Time()))

	364 IndexRow(new_row);

335 } else if (RowQualifiesAsSignificant(row, base::Time())) {	365 } else if (RowQualifiesAsSignificant(row, base::Time())) {

336 // This indexed row still qualifies and will be re-indexed.	366 // This indexed row still qualifies and will be re-indexed.

337 // The url won't have changed but the title, visit count, etc.	367 // The url won't have changed but the title, visit count, etc.

338 // might have changed.	368 // might have changed.

339 URLRow& old_row = row_pos->second;	369 URLRow& updated_row = row_pos->second;

340 old_row.set_visit_count(row.visit_count());	370 updated_row.set_visit_count(row.visit_count());

341 old_row.set_typed_count(row.typed_count());	371 updated_row.set_typed_count(row.typed_count());

342 old_row.set_last_visit(row.last_visit());	372 updated_row.set_last_visit(row.last_visit());

343 // TODO(mrossetti): When we start indexing the title the next line	373 // While the URL is guaranteed to remain stable, the title may have changed.

344 // will need attention.	374 // If so, then we need to update the index with the changed words.

345 old_row.set_title(row.title());	375 if (updated_row.title() != row.title()) {

	376 // Clear all words associated with this row and re-index both the

	377 // URL and title.

	378 RemoveRowWordsFromIndex(updated_row);

	379 updated_row.set_title(row.title());

	380 AddRowWordsToIndex(updated_row);

	381 }

346 } else {	382 } else {

347 // This indexed row no longer qualifies and will be de-indexed.	383 // This indexed row no longer qualifies and will be de-indexed by

348 history_info_map_.erase(row_id);	384 // clearing all words associated with this row.

	385 URLRow& removed_row = row_pos->second;

	386 RemoveRowFromIndex(removed_row);

349 }	387 }

350 // This invalidates the cache.	388 // This invalidates the cache.

351 search_term_cache_.clear();	389 search_term_cache_.clear();

352 // TODO(mrossetti): Record this transaction in the cache.

353 }	390 }

354	391

355 void InMemoryURLIndex::DeleteURL(URLID row_id) {	392 void InMemoryURLIndex::DeleteURL(URLID row_id) {

356 // Note that this does not remove any reference to this row from the	393 // Note that this does not remove any reference to this row from the

357 // word_id_history_map_. That map will continue to contain (and return)	394 // word_id_history_map_. That map will continue to contain (and return)

358 // hits against this row until that map is rebuilt, but since the	395 // hits against this row until that map is rebuilt, but since the

359 // history_info_map_ no longer references the row no erroneous results	396 // history_info_map_ no longer references the row no erroneous results

360 // will propagate to the user.	397 // will propagate to the user.

361 history_info_map_.erase(row_id);	398 private_data_->history_info_map_.erase(row_id);

362 // This invalidates the word cache.	399 // This invalidates the word cache.

363 search_term_cache_.clear();	400 search_term_cache_.clear();

364 // TODO(mrossetti): Record this transaction in the cache.

365 }	401 }

366	402

367 // Searching	403 // Searching

368	404

369 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(	405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(

370 const String16Vector& terms) {	406 const String16Vector& terms) {

371 ScoredHistoryMatches scored_items;	407 ScoredHistoryMatches scored_items;

	408

	409 // Do nothing if we have indexed no words (probably because we've not been

	410 // initialized yet).

	411 if (private_data_->word_list_.empty())

	412 return scored_items;

	413

372 if (!terms.empty()) {	414 if (!terms.empty()) {

373 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	415 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

374 // approach.	416 // approach.

375 ResetSearchTermCache();	417 ResetSearchTermCache();

376	418

377 // Lowercase the terms.	419 // Lowercase the terms.

378 // TODO(mrossetti): Another opportunity for a transform algorithm.	420 // TODO(mrossetti): Another opportunity for a transform algorithm.

379 String16Vector lower_terms;	421 String16Vector lower_terms;

380 for (String16Vector::const_iterator term_iter = terms.begin();	422 for (String16Vector::const_iterator term_iter = terms.begin();

381 term_iter != terms.end(); ++term_iter)	423 term_iter != terms.end(); ++term_iter)

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
419	461

420 return scored_items;	462 return scored_items;

421 }	463 }

422	464

423 void InMemoryURLIndex::ResetSearchTermCache() {	465 void InMemoryURLIndex::ResetSearchTermCache() {

424 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	466 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

425 iter != search_term_cache_.end(); ++iter)	467 iter != search_term_cache_.end(); ++iter)

426 iter->second.used_ = false;	468 iter->second.used_ = false;

427 }	469 }

428	470

429 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(	471 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(

430 const string16& uni_string) {	472 const string16& uni_string) {

431 // Break the terms down into individual terms (words), get the candidate	473 // Break the terms down into individual terms (words), get the candidate

432 // set for each term, and intersect each to get a final candidate list.	474 // set for each term, and intersect each to get a final candidate list.

433 // Note that a single 'term' from the user's perspective might be	475 // Note that a single 'term' from the user's perspective might be

434 // a string like "http://www.somewebsite.com" which, from our perspective,	476 // a string like "http://www.somewebsite.com" which, from our perspective,

435 // is four words: 'http', 'www', 'somewebsite', and 'com'.	477 // is four words: 'http', 'www', 'somewebsite', and 'com'.

436 HistoryIDSet history_id_set;	478 HistoryIDSet history_id_set;

437 String16Vector terms = WordVectorFromString16(uni_string, true);	479 String16Vector terms = String16VectorFromString16(uni_string, true);

438 // Sort the terms into the longest first as such are likely to narrow down	480 // Sort the terms into the longest first as such are likely to narrow down

439 // the results quicker. Also, single character terms are the most expensive	481 // the results quicker. Also, single character terms are the most expensive

440 // to process so save them for last.	482 // to process so save them for last.

441 std::sort(terms.begin(), terms.end(), LengthGreater);	483 std::sort(terms.begin(), terms.end(), LengthGreater);

442 for (String16Vector::iterator iter = terms.begin(); iter != terms.end();	484 for (String16Vector::iterator iter = terms.begin(); iter != terms.end();

443 ++iter) {	485 ++iter) {

444 string16 uni_word = *iter;	486 string16 uni_word = *iter;

445 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);	487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);

446 if (term_history_set.empty()) {	488 if (term_history_set.empty()) {

447 history_id_set.clear();	489 history_id_set.clear();

448 break;	490 break;

449 }	491 }

450 if (iter == terms.begin()) {	492 if (iter == terms.begin()) {

451 history_id_set.swap(term_history_set);	493 history_id_set.swap(term_history_set);

452 } else {	494 } else {

453 HistoryIDSet new_history_id_set;	495 HistoryIDSet new_history_id_set;

454 std::set_intersection(history_id_set.begin(), history_id_set.end(),	496 std::set_intersection(history_id_set.begin(), history_id_set.end(),

455 term_history_set.begin(), term_history_set.end(),	497 term_history_set.begin(), term_history_set.end(),

456 std::inserter(new_history_id_set,	498 std::inserter(new_history_id_set,

457 new_history_id_set.begin()));	499 new_history_id_set.begin()));

458 history_id_set.swap(new_history_id_set);	500 history_id_set.swap(new_history_id_set);

459 }	501 }

460 }	502 }

461 return history_id_set;	503 return history_id_set;

462 }	504 }

463	505

464 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(	506 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(

465 const string16& term) {	507 const string16& term) {

466 if (term.empty())	508 if (term.empty())

467 return HistoryIDSet();	509 return HistoryIDSet();

468	510

469 // TODO(mrossetti): Consider optimizing for very common terms such as	511 // TODO(mrossetti): Consider optimizing for very common terms such as

470 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently	512 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

471 // occuring words in the user's searches.	513 // occuring words in the user's searches.

472	514

473 size_t term_length = term.length();	515 size_t term_length = term.length();

474 InMemoryURLIndex::WordIDSet word_id_set;	516 WordIDSet word_id_set;

475 if (term_length > 1) {	517 if (term_length > 1) {

476 // See if this term or a prefix thereof is present in the cache.	518 // See if this term or a prefix thereof is present in the cache.

477 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());	519 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());

478 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();	520 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

479 cache_iter != search_term_cache_.end(); ++cache_iter) {	521 cache_iter != search_term_cache_.end(); ++cache_iter) {

480 if (StartsWith(term, cache_iter->first, false) &&	522 if (StartsWith(term, cache_iter->first, false) &&

481 (best_prefix == search_term_cache_.end() \|\|	523 (best_prefix == search_term_cache_.end() \|\|

482 cache_iter->first.length() > best_prefix->first.length()))	524 cache_iter->first.length() > best_prefix->first.length()))

483 best_prefix = cache_iter;	525 best_prefix = cache_iter;

484 }	526 }

(...skipping 25 matching lines...) Expand all Loading...
510	552

511 // Filter for each remaining, unique character in the term.	553 // Filter for each remaining, unique character in the term.

512 Char16Set leftover_chars = Char16SetFromString16(leftovers);	554 Char16Set leftover_chars = Char16SetFromString16(leftovers);

513 Char16Set unique_chars;	555 Char16Set unique_chars;

514 std::set_difference(leftover_chars.begin(), leftover_chars.end(),	556 std::set_difference(leftover_chars.begin(), leftover_chars.end(),

515 prefix_chars.begin(), prefix_chars.end(),	557 prefix_chars.begin(), prefix_chars.end(),

516 std::inserter(unique_chars, unique_chars.begin()));	558 std::inserter(unique_chars, unique_chars.begin()));

517	559

518 // Reduce the word set with any leftover, unprocessed characters.	560 // Reduce the word set with any leftover, unprocessed characters.

519 if (!unique_chars.empty()) {	561 if (!unique_chars.empty()) {

520 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));	562 WordIDSet leftover_set(

	563 private_data_->WordIDSetForTermChars(unique_chars));

521 // We might come up empty on the leftovers.	564 // We might come up empty on the leftovers.

522 if (leftover_set.empty()) {	565 if (leftover_set.empty()) {

523 search_term_cache_[term] = SearchTermCacheItem();	566 search_term_cache_[term] = SearchTermCacheItem();

524 return HistoryIDSet();	567 return HistoryIDSet();

525 }	568 }

526 // Or there may not have been a prefix from which to start.	569 // Or there may not have been a prefix from which to start.

527 if (prefix_chars.empty()) {	570 if (prefix_chars.empty()) {

528 word_id_set.swap(leftover_set);	571 word_id_set.swap(leftover_set);

529 } else {	572 } else {

530 WordIDSet new_word_id_set;	573 WordIDSet new_word_id_set;

531 std::set_intersection(word_id_set.begin(), word_id_set.end(),	574 std::set_intersection(word_id_set.begin(), word_id_set.end(),

532 leftover_set.begin(), leftover_set.end(),	575 leftover_set.begin(), leftover_set.end(),

533 std::inserter(new_word_id_set,	576 std::inserter(new_word_id_set,

534 new_word_id_set.begin()));	577 new_word_id_set.begin()));

535 word_id_set.swap(new_word_id_set);	578 word_id_set.swap(new_word_id_set);

536 }	579 }

537 }	580 }

538	581

539 // We must filter the word list because the resulting word set surely	582 // We must filter the word list because the resulting word set surely

540 // contains words which do not have the search term as a proper subset.	583 // contains words which do not have the search term as a proper subset.

541 for (WordIDSet::iterator word_set_iter = word_id_set.begin();	584 for (WordIDSet::iterator word_set_iter = word_id_set.begin();

542 word_set_iter != word_id_set.end(); ) {	585 word_set_iter != word_id_set.end(); ) {

543 if (word_list_[*word_set_iter].find(term) == string16::npos)	586 if (private_data_->word_list_[*word_set_iter].find(term) ==

	587 string16::npos)

544 word_id_set.erase(word_set_iter++);	588 word_id_set.erase(word_set_iter++);

545 else	589 else

546 ++word_set_iter;	590 ++word_set_iter;

547 }	591 }

548 } else {	592 } else {

549 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));	593 word_id_set =

	594 private_data_->WordIDSetForTermChars(Char16SetFromString16(term));

550 }	595 }

551	596

552 // If any words resulted then we can compose a set of history IDs by unioning	597 // If any words resulted then we can compose a set of history IDs by unioning

553 // the sets from each word.	598 // the sets from each word.

554 HistoryIDSet history_id_set;	599 HistoryIDSet history_id_set;

555 if (!word_id_set.empty()) {	600 if (!word_id_set.empty()) {

556 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	601 for (WordIDSet::iterator word_id_iter = word_id_set.begin();

557 word_id_iter != word_id_set.end(); ++word_id_iter) {	602 word_id_iter != word_id_set.end(); ++word_id_iter) {

558 WordID word_id = *word_id_iter;	603 WordID word_id = *word_id_iter;

559 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);	604 WordIDHistoryMap::iterator word_iter =

560 if (word_iter != word_id_history_map_.end()) {	605 private_data_->word_id_history_map_.find(word_id);

	606 if (word_iter != private_data_->word_id_history_map_.end()) {

561 HistoryIDSet& word_history_id_set(word_iter->second);	607 HistoryIDSet& word_history_id_set(word_iter->second);

562 history_id_set.insert(word_history_id_set.begin(),	608 history_id_set.insert(word_history_id_set.begin(),

563 word_history_id_set.end());	609 word_history_id_set.end());

564 }	610 }

565 }	611 }

566 }	612 }

567	613

568 // Record a new cache entry for this word if the term is longer than	614 // Record a new cache entry for this word if the term is longer than

569 // a single character.	615 // a single character.

570 if (term_length > 1)	616 if (term_length > 1)

571 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);	617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

572	618

573 return history_id_set;	619 return history_id_set;

574 }	620 }

575	621

576 // Utility Functions	622 // Utility Functions

577	623

578 // static

579 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(

580 const string16& uni_string) {

581 const size_t kMaxWordLength = 64;

582 String16Vector words = WordVectorFromString16(uni_string, false);

583 String16Set word_set;

584 for (String16Vector::const_iterator iter = words.begin(); iter != words.end();

585 ++iter)

586 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));

587 return word_set;

588 }

589

590 // static

591 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(

592 const string16& uni_string,

593 bool break_on_space) {

594 base::i18n::BreakIterator iter(

595 uni_string,

596 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE

597 : base::i18n::BreakIterator::BREAK_WORD);

598 String16Vector words;

599 if (!iter.Init())

600 return words;

601 while (iter.Advance()) {

602 if (break_on_space \|\| iter.IsWord()) {

603 string16 word = iter.GetString();

604 if (break_on_space)

605 TrimWhitespace(word, TRIM_ALL, &word);

606 if (!word.empty())

607 words.push_back(word);

608 }

609 }

610 return words;

611 }

612

613 // static

614 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(

615 const string16& term) {

616 Char16Set characters;

617 for (string16::const_iterator iter = term.begin(); iter != term.end();

618 ++iter)

619 characters.insert(*iter);

620 return characters;

621 }

622

623 void InMemoryURLIndex::AddWordToIndex(const string16& term,	624 void InMemoryURLIndex::AddWordToIndex(const string16& term,

624 HistoryID history_id) {	625 HistoryID history_id) {

625 WordMap::iterator word_pos = word_map_.find(term);	626 WordMap::iterator word_pos = private_data_->word_map_.find(term);

626 if (word_pos != word_map_.end())	627 if (word_pos != private_data_->word_map_.end())

627 UpdateWordHistory(word_pos->second, history_id);	628 UpdateWordHistory(word_pos->second, history_id);

628 else	629 else

629 AddWordHistory(term, history_id);	630 AddWordHistory(term, history_id);

630 }	631 }

631	632

632 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {	633 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {

633 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);	634 WordIDHistoryMap::iterator history_pos =

634 DCHECK(history_pos != word_id_history_map_.end());	635 private_data_->word_id_history_map_.find(word_id);

635 HistoryIDSet& history_id_set(history_pos->second);	636 DCHECK(history_pos != private_data_->word_id_history_map_.end());

636 history_id_set.insert(history_id);	637 HistoryIDSet& history_id_set(history_pos->second);

	638 history_id_set.insert(history_id);

	639 private_data_->AddToHistoryIDWordMap(history_id, word_id);

637 }	640 }

638	641

639 // Add a new word to the word list and the word map, and then create a	642 // Add a new word to the word list and the word map, and then create a

640 // new entry in the word/history map.	643 // new entry in the word/history map.

641 void InMemoryURLIndex::AddWordHistory(const string16& term,	644 void InMemoryURLIndex::AddWordHistory(const string16& term,

642 HistoryID history_id) {	645 HistoryID history_id) {

643 word_list_.push_back(term);	646 URLIndexPrivateData& private_data(*(private_data_.get()));

644 WordID word_id = word_list_.size() - 1;	647 WordID word_id = private_data.word_list_.size();

645 word_map_[term] = word_id;	648 if (private_data.available_words_.empty()) {

	649 private_data.word_list_.push_back(term);

	650 } else {

	651 word_id = *(private_data.available_words_.begin());

	652 private_data.word_list_[word_id] = term;

	653 private_data.available_words_.erase(word_id);

	654 }

	655 private_data.word_map_[term] = word_id;

	656

646 HistoryIDSet history_id_set;	657 HistoryIDSet history_id_set;

647 history_id_set.insert(history_id);	658 history_id_set.insert(history_id);

648 word_id_history_map_[word_id] = history_id_set;	659 private_data.word_id_history_map_[word_id] = history_id_set;

	660 private_data.AddToHistoryIDWordMap(history_id, word_id);

	661

649 // For each character in the newly added word (i.e. a word that is not	662 // For each character in the newly added word (i.e. a word that is not

650 // already in the word index), add the word to the character index.	663 // already in the word index), add the word to the character index.

651 Char16Set characters = Char16SetFromString16(term);	664 Char16Set characters = Char16SetFromString16(term);

652 for (Char16Set::iterator uni_char_iter = characters.begin();	665 for (Char16Set::iterator uni_char_iter = characters.begin();

653 uni_char_iter != characters.end(); ++uni_char_iter) {	666 uni_char_iter != characters.end(); ++uni_char_iter) {

654 char16 uni_char = *uni_char_iter;	667 char16 uni_char = *uni_char_iter;

655 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);	668 CharWordIDMap::iterator char_iter =

656 if (char_iter != char_word_map_.end()) {	669 private_data.char_word_map_.find(uni_char);

	670 if (char_iter != private_data.char_word_map_.end()) {

657 // Update existing entry in the char/word index.	671 // Update existing entry in the char/word index.

658 WordIDSet& word_id_set(char_iter->second);	672 WordIDSet& word_id_set(char_iter->second);

659 word_id_set.insert(word_id);	673 word_id_set.insert(word_id);

660 } else {	674 } else {

661 // Create a new entry in the char/word index.	675 // Create a new entry in the char/word index.

662 WordIDSet word_id_set;	676 WordIDSet word_id_set;

663 word_id_set.insert(word_id);	677 word_id_set.insert(word_id);

664 char_word_map_[uni_char] = word_id_set;	678 private_data.char_word_map_[uni_char] = word_id_set;

665 }	679 }

666 }	680 }

667 }	681 }

668	682

669 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(

670 const Char16Set& term_chars) {

671 WordIDSet word_id_set;

672 for (Char16Set::const_iterator c_iter = term_chars.begin();

673 c_iter != term_chars.end(); ++c_iter) {

674 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

675 if (char_iter == char_word_map_.end()) {

676 // A character was not found so there are no matching results: bail.

677 word_id_set.clear();

678 break;

679 }

680 WordIDSet& char_word_id_set(char_iter->second);

681 // It is possible for there to no longer be any words associated with

682 // a particular character. Give up in that case.

683 if (char_word_id_set.empty()) {

684 word_id_set.clear();

685 break;

686 }

687

688 if (c_iter == term_chars.begin()) {

689 // First character results becomes base set of results.

690 word_id_set = char_word_id_set;

691 } else {

692 // Subsequent character results get intersected in.

693 WordIDSet new_word_id_set;

694 std::set_intersection(word_id_set.begin(), word_id_set.end(),

695 char_word_id_set.begin(), char_word_id_set.end(),

696 std::inserter(new_word_id_set,

697 new_word_id_set.begin()));

698 word_id_set.swap(new_word_id_set);

699 }

700 }

701 return word_id_set;

702 }

703

704 // static	683 // static

705 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,	684 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.

706 const string16& string,

707 int term_num) {

708 const size_t kMaxCompareLength = 2048;

709 const string16& short_string = (string.length() > kMaxCompareLength) ?

710 string.substr(0, kMaxCompareLength) : string;

711 TermMatches matches;

712 for (size_t location = short_string.find(term); location != string16::npos;

713 location = short_string.find(term, location + 1)) {

714 matches.push_back(TermMatch(term_num, location, term.length()));

715 }

716 return matches;

717 }

718

719 // static

720 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {

721 if (matches.empty())

722 return matches;

723 TermMatches sorted_matches = matches;

724 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);

725 TermMatches clean_matches;

726 TermMatch last_match = sorted_matches[0];

727 clean_matches.push_back(last_match);

728 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;

729 iter != sorted_matches.end(); ++iter) {

730 if (iter->offset >= last_match.offset + last_match.length) {

731 last_match = *iter;

732 clean_matches.push_back(last_match);

733 }

734 }

735 return clean_matches;

736 }

737

738 // static

739 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(

740 const TermMatches& matches) {

741 std::vector<size_t> offsets;

742 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)

743 offsets.push_back(i->offset);

744 return offsets;

745 }

746

747 // static

748 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(

749 const TermMatches& matches,

750 const std::vector<size_t>& offsets) {

751 DCHECK_EQ(matches.size(), offsets.size());

752 TermMatches new_matches;

753 std::vector<size_t>::const_iterator offset_iter = offsets.begin();

754 for (TermMatches::const_iterator term_iter = matches.begin();

755 term_iter != matches.end(); ++term_iter, ++offset_iter) {

756 if (*offset_iter != string16::npos) {

757 TermMatch new_match(*term_iter);

758 new_match.offset = *offset_iter;

759 new_matches.push_back(new_match);

760 }

761 }

762 return new_matches;

763 }

764

765 // static

766 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(	685 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(

767 const URLRow& row,	686 const URLRow& row,

768 const String16Vector& terms) {	687 const String16Vector& terms) {

769 ScoredHistoryMatch match(row);	688 ScoredHistoryMatch match(row);

770 GURL gurl = row.url();	689 GURL gurl = row.url();

771 if (!gurl.is_valid())	690 if (!gurl.is_valid())

772 return match;	691 return match;

773	692

774 // Figure out where each search term appears in the URL and/or page title	693 // Figure out where each search term appears in the URL and/or page title

775 // so that we can score as well as provide autocomplete highlighting.	694 // so that we can score as well as provide autocomplete highlighting.

776 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));	695 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));

777 string16 title = base::i18n::ToLower(row.title());	696 string16 title = base::i18n::ToLower(row.title());

778 int term_num = 0;	697 int term_num = 0;

779 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();	698 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();

780 ++iter, ++term_num) {	699 ++iter, ++term_num) {

781 string16 term = *iter;	700 string16 term = *iter;

782 TermMatches url_term_matches = MatchTermInString(term, url, term_num);	701 TermMatches url_term_matches = MatchTermInString(term, url, term_num);

783 TermMatches title_term_matches = MatchTermInString(term, title, term_num);	702 TermMatches title_term_matches = MatchTermInString(term, title, term_num);

784 if (url_term_matches.empty() && title_term_matches.empty())	703 if (url_term_matches.empty() && title_term_matches.empty())

785 return match; // A term was not found in either URL or title - reject.	704 return match; // A term was not found in either URL or title - reject.

786 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),	705 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),

787 url_term_matches.end());	706 url_term_matches.end());

788 match.title_matches.insert(match.title_matches.end(),	707 match.title_matches.insert(match.title_matches.end(),

789 title_term_matches.begin(),	708 title_term_matches.begin(),

790 title_term_matches.end());	709 title_term_matches.end());

791 }	710 }

792	711

793 // Sort matches by offset and eliminate any which overlap.	712 // Sort matches by offset and eliminate any which overlap.

794 match.url_matches = SortAndDeoverlap(match.url_matches);	713 match.url_matches = SortAndDeoverlapMatches(match.url_matches);

795 match.title_matches = SortAndDeoverlap(match.title_matches);	714 match.title_matches = SortAndDeoverlapMatches(match.title_matches);

796	715

797 // We should not (currently) inline autocomplete a result unless both of the	716 // We should not (currently) inline autocomplete a result unless both of the

798 // following are true:	717 // following are true:

799 // * There is exactly one substring matches in the URL, and	718 // * There is exactly one substring matches in the URL, and

800 // * The one URL match starts at the beginning of the URL.	719 // * The one URL match starts at the beginning of the URL.

801 match.can_inline =	720 match.can_inline =

802 match.url_matches.size() == 1 && match.url_matches[0].offset == 0;	721 match.url_matches.size() == 1 && match.url_matches[0].offset == 0;

803	722

804 // Get partial scores based on term matching. Note that the score for	723 // Get partial scores based on term matching. Note that the score for

805 // each of the URL and title are adjusted by the fraction of the	724 // each of the URL and title are adjusted by the fraction of the

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
899	818

900 // Scale the sum of the three components above into a single score component	819 // Scale the sum of the three components above into a single score component

901 // on the same scale as that used in ScoredMatchForURL().	820 // on the same scale as that used in ScoredMatchForURL().

902 return ScoreForValue(raw_score, kTermScoreLevel);	821 return ScoreForValue(raw_score, kTermScoreLevel);

903 }	822 }

904	823

905 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(	824 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(

906 const InMemoryURLIndex& index,	825 const InMemoryURLIndex& index,

907 const String16Vector& lower_terms)	826 const String16Vector& lower_terms)

908 : index_(index),	827 : index_(index),

909 lower_terms_(lower_terms) {	828 lower_terms_(lower_terms) {}

910 }

911	829

912 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}	830 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}

913	831

914 void InMemoryURLIndex::AddHistoryMatch::operator()(	832 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {

915 const InMemoryURLIndex::HistoryID history_id) {

916 HistoryInfoMap::const_iterator hist_pos =	833 HistoryInfoMap::const_iterator hist_pos =

917 index_.history_info_map_.find(history_id);	834 index_.private_data_->history_info_map_.find(history_id);

918 // Note that a history_id may be present in the word_id_history_map_ yet not	835 // Note that a history_id may be present in the word_id_history_map_ yet not

919 // be found in the history_info_map_. This occurs when an item has been	836 // be found in the history_info_map_. This occurs when an item has been

920 // deleted by the user or the item no longer qualifies as a quick result.	837 // deleted by the user or the item no longer qualifies as a quick result.

921 if (hist_pos != index_.history_info_map_.end()) {	838 if (hist_pos != index_.private_data_->history_info_map_.end()) {

922 const URLRow& hist_item = hist_pos->second;	839 const URLRow& hist_item = hist_pos->second;

923 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));	840 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));

924 if (match.raw_score > 0)	841 if (match.raw_score > 0)

925 scored_matches_.push_back(match);	842 scored_matches_.push_back(match);

926 }	843 }

927 }	844 }

928	845

929 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {	846 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {

930 if (history_dir_.empty())	847 if (history_dir_.empty())

931 return false;	848 return false;

932 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));	849 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));

933 return true;	850 return true;

934 }	851 }

935	852

936 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {	853 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {

937 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();	854 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();

938 }	855 }

939	856

940 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {	857 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {

941 DCHECK(cache);	858 DCHECK(cache);

942 cache->set_timestamp(base::Time::Now().ToInternalValue());	859 cache->set_timestamp(base::Time::Now().ToInternalValue());

943 cache->set_history_item_count(history_item_count_);	860 // history_item_count_ is no longer used but rather than change the protobuf

	861 // definition use a placeholder. This will go away with the switch to SQLite.

	862 cache->set_history_item_count(0);

944 SaveWordList(cache);	863 SaveWordList(cache);

945 SaveWordMap(cache);	864 SaveWordMap(cache);

946 SaveCharWordMap(cache);	865 SaveCharWordMap(cache);

947 SaveWordIDHistoryMap(cache);	866 SaveWordIDHistoryMap(cache);

948 SaveHistoryInfoMap(cache);	867 SaveHistoryInfoMap(cache);

949 }	868 }

950	869

951 bool InMemoryURLIndex::RestorePrivateData(	870 bool InMemoryURLIndex::RestorePrivateData(

952 const InMemoryURLIndexCacheItem& cache) {	871 const InMemoryURLIndexCacheItem& cache) {

953 last_saved_ = base::Time::FromInternalValue(cache.timestamp());	872 last_saved_ = base::Time::FromInternalValue(cache.timestamp());

954 history_item_count_ = cache.history_item_count();	873 return RestoreWordList(cache) && RestoreWordMap(cache) &&

955 return (history_item_count_ == 0) \|\| (RestoreWordList(cache) &&	874 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

956 RestoreWordMap(cache) && RestoreCharWordMap(cache) &&	875 RestoreHistoryInfoMap(cache);

957 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));

958 }	876 }

959	877

960

961 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	878 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

962 if (word_list_.empty())	879 if (private_data_->word_list_.empty())

963 return;	880 return;

964 WordListItem* list_item = cache->mutable_word_list();	881 WordListItem* list_item = cache->mutable_word_list();

965 list_item->set_word_count(word_list_.size());	882 list_item->set_word_count(private_data_->word_list_.size());

966 for (String16Vector::const_iterator iter = word_list_.begin();	883 for (String16Vector::const_iterator iter = private_data_->word_list_.begin();

967 iter != word_list_.end(); ++iter)	884 iter != private_data_->word_list_.end(); ++iter)

968 list_item->add_word(UTF16ToUTF8(*iter));	885 list_item->add_word(UTF16ToUTF8(*iter));

969 }	886 }

970	887

971 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {	888 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {

972 if (!cache.has_word_list())	889 if (!cache.has_word_list())

973 return false;	890 return false;

974 const WordListItem& list_item(cache.word_list());	891 const WordListItem& list_item(cache.word_list());

975 uint32 expected_item_count = list_item.word_count();	892 uint32 expected_item_count = list_item.word_count();

976 uint32 actual_item_count = list_item.word_size();	893 uint32 actual_item_count = list_item.word_size();

977 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	894 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

978 return false;	895 return false;

979 const RepeatedPtrField<std::string>& words(list_item.word());	896 const RepeatedPtrField<std::string>& words(list_item.word());

980 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();	897 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

981 iter != words.end(); ++iter)	898 iter != words.end(); ++iter)

982 word_list_.push_back(UTF8ToUTF16(*iter));	899 private_data_->word_list_.push_back(UTF8ToUTF16(*iter));

983 return true;	900 return true;

984 }	901 }

985	902

986 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {	903 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

987 if (word_map_.empty())	904 if (private_data_->word_map_.empty())

988 return;	905 return;

989 WordMapItem* map_item = cache->mutable_word_map();	906 WordMapItem* map_item = cache->mutable_word_map();

990 map_item->set_item_count(word_map_.size());	907 map_item->set_item_count(private_data_->word_map_.size());

991 for (WordMap::const_iterator iter = word_map_.begin();	908 for (WordMap::const_iterator iter = private_data_->word_map_.begin();

992 iter != word_map_.end(); ++iter) {	909 iter != private_data_->word_map_.end(); ++iter) {

993 WordMapEntry* map_entry = map_item->add_word_map_entry();	910 WordMapEntry* map_entry = map_item->add_word_map_entry();

994 map_entry->set_word(UTF16ToUTF8(iter->first));	911 map_entry->set_word(UTF16ToUTF8(iter->first));

995 map_entry->set_word_id(iter->second);	912 map_entry->set_word_id(iter->second);

996 }	913 }

997 }	914 }

998	915

999 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {	916 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {

1000 if (!cache.has_word_map())	917 if (!cache.has_word_map())

1001 return false;	918 return false;

1002 const WordMapItem& list_item(cache.word_map());	919 const WordMapItem& list_item(cache.word_map());

1003 uint32 expected_item_count = list_item.item_count();	920 uint32 expected_item_count = list_item.item_count();

1004 uint32 actual_item_count = list_item.word_map_entry_size();	921 uint32 actual_item_count = list_item.word_map_entry_size();

1005 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	922 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1006 return false;	923 return false;

1007 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());	924 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

1008 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();	925 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

1009 iter != entries.end(); ++iter)	926 iter != entries.end(); ++iter)

1010 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();	927 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

1011 return true;	928 return true;

1012 }	929 }

1013	930

1014 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {	931 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {

1015 if (char_word_map_.empty())	932 if (private_data_->char_word_map_.empty())

1016 return;	933 return;

1017 CharWordMapItem* map_item = cache->mutable_char_word_map();	934 CharWordMapItem* map_item = cache->mutable_char_word_map();

1018 map_item->set_item_count(char_word_map_.size());	935 map_item->set_item_count(private_data_->char_word_map_.size());

1019 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();	936 for (CharWordIDMap::const_iterator iter =

1020 iter != char_word_map_.end(); ++iter) {	937 private_data_->char_word_map_.begin();

	938 iter != private_data_->char_word_map_.end(); ++iter) {

1021 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();	939 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

1022 map_entry->set_char_16(iter->first);	940 map_entry->set_char_16(iter->first);

1023 const WordIDSet& word_id_set(iter->second);	941 const WordIDSet& word_id_set(iter->second);

1024 map_entry->set_item_count(word_id_set.size());	942 map_entry->set_item_count(word_id_set.size());

1025 for (WordIDSet::const_iterator set_iter = word_id_set.begin();	943 for (WordIDSet::const_iterator set_iter = word_id_set.begin();

1026 set_iter != word_id_set.end(); ++set_iter)	944 set_iter != word_id_set.end(); ++set_iter)

1027 map_entry->add_word_id(*set_iter);	945 map_entry->add_word_id(*set_iter);

1028 }	946 }

1029 }	947 }

1030	948

(...skipping 13 matching lines...) Expand all Loading...
1044 expected_item_count = iter->item_count();	962 expected_item_count = iter->item_count();

1045 actual_item_count = iter->word_id_size();	963 actual_item_count = iter->word_id_size();

1046 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	964 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1047 return false;	965 return false;

1048 char16 uni_char = static_cast<char16>(iter->char_16());	966 char16 uni_char = static_cast<char16>(iter->char_16());

1049 WordIDSet word_id_set;	967 WordIDSet word_id_set;

1050 const RepeatedField<int32>& word_ids(iter->word_id());	968 const RepeatedField<int32>& word_ids(iter->word_id());

1051 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();	969 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

1052 jiter != word_ids.end(); ++jiter)	970 jiter != word_ids.end(); ++jiter)

1053 word_id_set.insert(*jiter);	971 word_id_set.insert(*jiter);

1054 char_word_map_[uni_char] = word_id_set;	972 private_data_->char_word_map_[uni_char] = word_id_set;

1055 }	973 }

1056 return true;	974 return true;

1057 }	975 }

1058	976

1059 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)	977 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)

1060 const {	978 const {

1061 if (word_id_history_map_.empty())	979 if (private_data_->word_id_history_map_.empty())

1062 return;	980 return;

1063 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();	981 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

1064 map_item->set_item_count(word_id_history_map_.size());	982 map_item->set_item_count(private_data_->word_id_history_map_.size());

1065 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();	983 for (WordIDHistoryMap::const_iterator iter =

1066 iter != word_id_history_map_.end(); ++iter) {	984 private_data_->word_id_history_map_.begin();

	985 iter != private_data_->word_id_history_map_.end(); ++iter) {

1067 WordIDHistoryMapEntry* map_entry =	986 WordIDHistoryMapEntry* map_entry =

1068 map_item->add_word_id_history_map_entry();	987 map_item->add_word_id_history_map_entry();

1069 map_entry->set_word_id(iter->first);	988 map_entry->set_word_id(iter->first);

1070 const HistoryIDSet& history_id_set(iter->second);	989 const HistoryIDSet& history_id_set(iter->second);

1071 map_entry->set_item_count(history_id_set.size());	990 map_entry->set_item_count(history_id_set.size());

1072 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();	991 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();

1073 set_iter != history_id_set.end(); ++set_iter)	992 set_iter != history_id_set.end(); ++set_iter)

1074 map_entry->add_history_id(*set_iter);	993 map_entry->add_history_id(*set_iter);

1075 }	994 }

1076 }	995 }

(...skipping 12 matching lines...) Expand all Loading...
1089 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =	1008 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =

1090 entries.begin(); iter != entries.end(); ++iter) {	1009 entries.begin(); iter != entries.end(); ++iter) {

1091 expected_item_count = iter->item_count();	1010 expected_item_count = iter->item_count();

1092 actual_item_count = iter->history_id_size();	1011 actual_item_count = iter->history_id_size();

1093 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1012 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1094 return false;	1013 return false;

1095 WordID word_id = iter->word_id();	1014 WordID word_id = iter->word_id();

1096 HistoryIDSet history_id_set;	1015 HistoryIDSet history_id_set;

1097 const RepeatedField<int64>& history_ids(iter->history_id());	1016 const RepeatedField<int64>& history_ids(iter->history_id());

1098 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();	1017 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

1099 jiter != history_ids.end(); ++jiter)	1018 jiter != history_ids.end(); ++jiter) {

1100 history_id_set.insert(*jiter);	1019 history_id_set.insert(*jiter);

1101 word_id_history_map_[word_id] = history_id_set;	1020 private_data_->AddToHistoryIDWordMap(*jiter, word_id);

	1021 }

	1022 private_data_->word_id_history_map_[word_id] = history_id_set;

1102 }	1023 }

1103 return true;	1024 return true;

1104 }	1025 }

1105	1026

1106 void InMemoryURLIndex::SaveHistoryInfoMap(	1027 void InMemoryURLIndex::SaveHistoryInfoMap(

1107 InMemoryURLIndexCacheItem* cache) const {	1028 InMemoryURLIndexCacheItem* cache) const {

1108 if (history_info_map_.empty())	1029 if (private_data_->history_info_map_.empty())

1109 return;	1030 return;

1110 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	1031 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

1111 map_item->set_item_count(history_info_map_.size());	1032 map_item->set_item_count(private_data_->history_info_map_.size());

1112 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1033 for (HistoryInfoMap::const_iterator iter =

1113 iter != history_info_map_.end(); ++iter) {	1034 private_data_->history_info_map_.begin();

	1035 iter != private_data_->history_info_map_.end(); ++iter) {

1114 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	1036 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

1115 map_entry->set_history_id(iter->first);	1037 map_entry->set_history_id(iter->first);

1116 const URLRow& url_row(iter->second);	1038 const URLRow& url_row(iter->second);

1117 // Note: We only save information that contributes to the index so there	1039 // Note: We only save information that contributes to the index so there

1118 // is no need to save search_term_cache_ (not persistent),	1040 // is no need to save search_term_cache_ (not persistent),

1119 // languages_, etc.	1041 // languages_, etc.

1120 map_entry->set_visit_count(url_row.visit_count());	1042 map_entry->set_visit_count(url_row.visit_count());

1121 map_entry->set_typed_count(url_row.typed_count());	1043 map_entry->set_typed_count(url_row.typed_count());

1122 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1044 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1123 map_entry->set_url(url_row.url().spec());	1045 map_entry->set_url(url_row.url().spec());

(...skipping 17 matching lines...) Expand all Loading...
1141 HistoryID history_id = iter->history_id();	1063 HistoryID history_id = iter->history_id();

1142 GURL url(iter->url());	1064 GURL url(iter->url());

1143 URLRow url_row(url, history_id);	1065 URLRow url_row(url, history_id);

1144 url_row.set_visit_count(iter->visit_count());	1066 url_row.set_visit_count(iter->visit_count());

1145 url_row.set_typed_count(iter->typed_count());	1067 url_row.set_typed_count(iter->typed_count());

1146 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1068 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

1147 if (iter->has_title()) {	1069 if (iter->has_title()) {

1148 string16 title(UTF8ToUTF16(iter->title()));	1070 string16 title(UTF8ToUTF16(iter->title()));

1149 url_row.set_title(title);	1071 url_row.set_title(title);

1150 }	1072 }

1151 history_info_map_[history_id] = url_row;	1073 private_data_->history_info_map_[history_id] = url_row;

1152 }	1074 }

1153 return true;	1075 return true;

1154 }	1076 }

1155	1077

1156 } // namespace history	1078 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/in_memory_url_index.h ('k') | chrome/browser/history/in_memory_url_index_types.h » ('j') | no next file with comments »