OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/url_index_private_data.h" | 5 #include "chrome/browser/autocomplete/url_index_private_data.h" |
6 | 6 |
7 #include <functional> | 7 #include <functional> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <limits> | 9 #include <limits> |
10 #include <numeric> | 10 #include <numeric> |
11 #include <string> | 11 #include <string> |
12 #include <vector> | 12 #include <vector> |
13 | 13 |
14 #include "base/basictypes.h" | 14 #include "base/basictypes.h" |
15 #include "base/files/file_util.h" | 15 #include "base/files/file_util.h" |
16 #include "base/i18n/break_iterator.h" | 16 #include "base/i18n/break_iterator.h" |
17 #include "base/i18n/case_conversion.h" | 17 #include "base/i18n/case_conversion.h" |
18 #include "base/metrics/histogram.h" | 18 #include "base/metrics/histogram.h" |
19 #include "base/strings/string_util.h" | 19 #include "base/strings/string_util.h" |
20 #include "base/strings/utf_string_conversions.h" | 20 #include "base/strings/utf_string_conversions.h" |
21 #include "base/time/time.h" | 21 #include "base/time/time.h" |
22 #include "chrome/browser/autocomplete/in_memory_url_index.h" | 22 #include "chrome/browser/autocomplete/in_memory_url_index.h" |
| 23 #include "components/bookmarks/browser/bookmark_model.h" |
23 #include "components/bookmarks/browser/bookmark_utils.h" | 24 #include "components/bookmarks/browser/bookmark_utils.h" |
24 #include "components/history/core/browser/history_database.h" | 25 #include "components/history/core/browser/history_database.h" |
25 #include "components/history/core/browser/history_db_task.h" | 26 #include "components/history/core/browser/history_db_task.h" |
26 #include "components/history/core/browser/history_service.h" | 27 #include "components/history/core/browser/history_service.h" |
27 #include "net/base/net_util.h" | 28 #include "net/base/net_util.h" |
28 | 29 |
29 #if defined(USE_SYSTEM_PROTOBUF) | 30 #if defined(USE_SYSTEM_PROTOBUF) |
30 #include <google/protobuf/repeated_field.h> | 31 #include <google/protobuf/repeated_field.h> |
31 #else | 32 #else |
32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 33 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
33 #endif | 34 #endif |
34 | 35 |
35 using google::protobuf::RepeatedField; | 36 using google::protobuf::RepeatedField; |
36 using google::protobuf::RepeatedPtrField; | 37 using google::protobuf::RepeatedPtrField; |
37 using in_memory_url_index::InMemoryURLIndexCacheItem; | 38 using in_memory_url_index::InMemoryURLIndexCacheItem; |
38 | 39 |
39 namespace { | 40 namespace { |
40 static const size_t kMaxVisitsToStoreInCache = 10u; | 41 static const size_t kMaxVisitsToStoreInCache = 10u; |
41 } // anonymous namespace | 42 } |
42 | 43 |
43 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem | 44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem |
44 WordListItem; | 45 WordListItem; |
45 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry | 46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry |
46 WordMapEntry; | 47 WordMapEntry; |
47 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; | 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; |
48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem | 49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem |
49 CharWordMapItem; | 50 CharWordMapItem; |
50 typedef in_memory_url_index:: | 51 typedef in_memory_url_index:: |
51 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry; | 52 InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry; |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 pre_filter_item_count_(0), | 146 pre_filter_item_count_(0), |
146 post_filter_item_count_(0), | 147 post_filter_item_count_(0), |
147 post_scoring_item_count_(0) { | 148 post_scoring_item_count_(0) { |
148 } | 149 } |
149 | 150 |
150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( | 151 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
151 base::string16 search_string, | 152 base::string16 search_string, |
152 size_t cursor_position, | 153 size_t cursor_position, |
153 size_t max_matches, | 154 size_t max_matches, |
154 const std::string& languages, | 155 const std::string& languages, |
155 const ScoredHistoryMatch::Builder& builder) { | 156 bookmarks::BookmarkModel* bookmark_model) { |
156 // If cursor position is set and useful (not at either end of the | 157 // If cursor position is set and useful (not at either end of the |
157 // string), allow the search string to be broken at cursor position. | 158 // string), allow the search string to be broken at cursor position. |
158 // We do this by pretending there's a space where the cursor is. | 159 // We do this by pretending there's a space where the cursor is. |
159 if ((cursor_position != base::string16::npos) && | 160 if ((cursor_position != base::string16::npos) && |
160 (cursor_position < search_string.length()) && | 161 (cursor_position < search_string.length()) && |
161 (cursor_position > 0)) { | 162 (cursor_position > 0)) { |
162 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); | 163 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
163 } | 164 } |
164 pre_filter_item_count_ = 0; | 165 pre_filter_item_count_ = 0; |
165 post_filter_item_count_ = 0; | 166 post_filter_item_count_ = 0; |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
239 // for in the database allows some whitespace "words" whereas Tokenize | 240 // for in the database allows some whitespace "words" whereas Tokenize |
240 // excludes a long list of whitespace.) One could write a scoring | 241 // excludes a long list of whitespace.) One could write a scoring |
241 // function that gives a reasonable order to matches when there | 242 // function that gives a reasonable order to matches when there |
242 // are no terms (i.e., all the words are some form of whitespace), | 243 // are no terms (i.e., all the words are some form of whitespace), |
243 // but this is such a rare edge case that it's not worth the time. | 244 // but this is such a rare edge case that it's not worth the time. |
244 return scored_items; | 245 return scored_items; |
245 } | 246 } |
246 scored_items = | 247 scored_items = |
247 std::for_each( | 248 std::for_each( |
248 history_id_set.begin(), history_id_set.end(), | 249 history_id_set.begin(), history_id_set.end(), |
249 AddHistoryMatch(*this, languages, lower_raw_string, lower_raw_terms, | 250 AddHistoryMatch(bookmark_model, *this, languages, lower_raw_string, |
250 base::Time::Now(), builder)).ScoredMatches(); | 251 lower_raw_terms, base::Time::Now())).ScoredMatches(); |
251 | 252 |
252 // Select and sort only the top |max_matches| results. | 253 // Select and sort only the top |max_matches| results. |
253 if (scored_items.size() > max_matches) { | 254 if (scored_items.size() > max_matches) { |
254 std::partial_sort(scored_items.begin(), | 255 std::partial_sort(scored_items.begin(), |
255 scored_items.begin() + | 256 scored_items.begin() + |
256 max_matches, | 257 max_matches, |
257 scored_items.end(), | 258 scored_items.end(), |
258 ScoredHistoryMatch::MatchScoreGreater); | 259 ScoredHistoryMatch::MatchScoreGreater); |
259 scored_items.resize(max_matches); | 260 scored_items.resize(max_matches); |
260 } else { | 261 } else { |
(...skipping 986 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1247 const std::set<std::string>& whitelist) { | 1248 const std::set<std::string>& whitelist) { |
1248 return whitelist.find(gurl.scheme()) != whitelist.end(); | 1249 return whitelist.find(gurl.scheme()) != whitelist.end(); |
1249 } | 1250 } |
1250 | 1251 |
1251 | 1252 |
1252 // SearchTermCacheItem --------------------------------------------------------- | 1253 // SearchTermCacheItem --------------------------------------------------------- |
1253 | 1254 |
1254 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( | 1255 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( |
1255 const WordIDSet& word_id_set, | 1256 const WordIDSet& word_id_set, |
1256 const HistoryIDSet& history_id_set) | 1257 const HistoryIDSet& history_id_set) |
1257 : word_id_set_(word_id_set), | 1258 : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) { |
1258 history_id_set_(history_id_set), | 1259 } |
1259 used_(true) {} | |
1260 | 1260 |
1261 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() | 1261 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) { |
1262 : used_(true) {} | 1262 } |
1263 | 1263 |
1264 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {} | 1264 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() { |
1265 | 1265 } |
1266 | 1266 |
1267 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- | 1267 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- |
1268 | 1268 |
1269 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( | 1269 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( |
| 1270 bookmarks::BookmarkModel* bookmark_model, |
1270 const URLIndexPrivateData& private_data, | 1271 const URLIndexPrivateData& private_data, |
1271 const std::string& languages, | 1272 const std::string& languages, |
1272 const base::string16& lower_string, | 1273 const base::string16& lower_string, |
1273 const String16Vector& lower_terms, | 1274 const String16Vector& lower_terms, |
1274 const base::Time now, | 1275 const base::Time now) |
1275 const ScoredHistoryMatch::Builder& builder) | 1276 : bookmark_model_(bookmark_model), |
1276 : private_data_(private_data), | 1277 private_data_(private_data), |
1277 languages_(languages), | 1278 languages_(languages), |
1278 builder_(builder), | |
1279 lower_string_(lower_string), | 1279 lower_string_(lower_string), |
1280 lower_terms_(lower_terms), | 1280 lower_terms_(lower_terms), |
1281 now_(now) { | 1281 now_(now) { |
1282 // Calculate offsets for each term. For instance, the offset for | 1282 // Calculate offsets for each term. For instance, the offset for |
1283 // ".net" should be 1, indicating that the actual word-part of the term | 1283 // ".net" should be 1, indicating that the actual word-part of the term |
1284 // starts at offset 1. | 1284 // starts at offset 1. |
1285 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u); | 1285 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u); |
1286 for (size_t i = 0; i < lower_terms_.size(); ++i) { | 1286 for (size_t i = 0; i < lower_terms_.size(); ++i) { |
1287 base::i18n::BreakIterator iter(lower_terms_[i], | 1287 base::i18n::BreakIterator iter(lower_terms_[i], |
1288 base::i18n::BreakIterator::BREAK_WORD); | 1288 base::i18n::BreakIterator::BREAK_WORD); |
1289 // If the iterator doesn't work, assume an offset of 0. | 1289 // If the iterator doesn't work, assume an offset of 0. |
1290 if (!iter.Init()) | 1290 if (!iter.Init()) |
1291 continue; | 1291 continue; |
1292 // Find the first word start. | 1292 // Find the first word start. |
1293 while (iter.Advance() && !iter.IsWord()) {} | 1293 while (iter.Advance() && !iter.IsWord()) {} |
1294 if (iter.IsWord()) | 1294 if (iter.IsWord()) |
1295 lower_terms_to_word_starts_offsets_[i] = iter.prev(); | 1295 lower_terms_to_word_starts_offsets_[i] = iter.prev(); |
1296 // Else: the iterator didn't find a word break. Assume an offset of 0. | 1296 // Else: the iterator didn't find a word break. Assume an offset of 0. |
1297 } | 1297 } |
1298 } | 1298 } |
1299 | 1299 |
1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {} | 1300 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() { |
| 1301 } |
1301 | 1302 |
1302 void URLIndexPrivateData::AddHistoryMatch::operator()( | 1303 void URLIndexPrivateData::AddHistoryMatch::operator()( |
1303 const HistoryID history_id) { | 1304 const HistoryID history_id) { |
1304 HistoryInfoMap::const_iterator hist_pos = | 1305 HistoryInfoMap::const_iterator hist_pos = |
1305 private_data_.history_info_map_.find(history_id); | 1306 private_data_.history_info_map_.find(history_id); |
1306 if (hist_pos != private_data_.history_info_map_.end()) { | 1307 if (hist_pos != private_data_.history_info_map_.end()) { |
1307 const history::URLRow& hist_item = hist_pos->second.url_row; | 1308 const history::URLRow& hist_item = hist_pos->second.url_row; |
1308 const VisitInfoVector& visits = hist_pos->second.visits; | 1309 const VisitInfoVector& visits = hist_pos->second.visits; |
1309 WordStartsMap::const_iterator starts_pos = | 1310 WordStartsMap::const_iterator starts_pos = |
1310 private_data_.word_starts_map_.find(history_id); | 1311 private_data_.word_starts_map_.find(history_id); |
1311 DCHECK(starts_pos != private_data_.word_starts_map_.end()); | 1312 DCHECK(starts_pos != private_data_.word_starts_map_.end()); |
1312 ScoredHistoryMatch match = builder_.Build( | 1313 ScoredHistoryMatch match( |
1313 hist_item, visits, languages_, lower_string_, lower_terms_, | 1314 hist_item, visits, languages_, lower_string_, lower_terms_, |
1314 lower_terms_to_word_starts_offsets_, starts_pos->second, now_); | 1315 lower_terms_to_word_starts_offsets_, starts_pos->second, |
| 1316 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()), |
| 1317 now_); |
1315 if (match.raw_score > 0) | 1318 if (match.raw_score > 0) |
1316 scored_matches_.push_back(match); | 1319 scored_matches_.push_back(match); |
1317 } | 1320 } |
1318 } | 1321 } |
1319 | 1322 |
1320 | 1323 |
1321 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 1324 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
1322 | 1325 |
1323 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( | 1326 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( |
1324 const HistoryInfoMap& history_info_map) | 1327 const HistoryInfoMap& history_info_map) |
1325 : history_info_map_(history_info_map) { | 1328 : history_info_map_(history_info_map) { |
1326 } | 1329 } |
1327 | 1330 |
1328 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} | 1331 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() { |
| 1332 } |
1329 | 1333 |
1330 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( | 1334 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( |
1331 const HistoryID h1, | 1335 const HistoryID h1, |
1332 const HistoryID h2) { | 1336 const HistoryID h2) { |
1333 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1)); | 1337 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1)); |
1334 if (entry1 == history_info_map_.end()) | 1338 if (entry1 == history_info_map_.end()) |
1335 return false; | 1339 return false; |
1336 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2)); | 1340 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2)); |
1337 if (entry2 == history_info_map_.end()) | 1341 if (entry2 == history_info_map_.end()) |
1338 return true; | 1342 return true; |
1339 const history::URLRow& r1(entry1->second.url_row); | 1343 const history::URLRow& r1(entry1->second.url_row); |
1340 const history::URLRow& r2(entry2->second.url_row); | 1344 const history::URLRow& r2(entry2->second.url_row); |
1341 // First cut: typed count, visit count, recency. | 1345 // First cut: typed count, visit count, recency. |
1342 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1346 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1343 // recently visited (within the last 12/24 hours) as highly important. Get | 1347 // recently visited (within the last 12/24 hours) as highly important. Get |
1344 // input from mpearson. | 1348 // input from mpearson. |
1345 if (r1.typed_count() != r2.typed_count()) | 1349 if (r1.typed_count() != r2.typed_count()) |
1346 return (r1.typed_count() > r2.typed_count()); | 1350 return (r1.typed_count() > r2.typed_count()); |
1347 if (r1.visit_count() != r2.visit_count()) | 1351 if (r1.visit_count() != r2.visit_count()) |
1348 return (r1.visit_count() > r2.visit_count()); | 1352 return (r1.visit_count() > r2.visit_count()); |
1349 return (r1.last_visit() > r2.last_visit()); | 1353 return (r1.last_visit() > r2.last_visit()); |
1350 } | 1354 } |
OLD | NEW |