OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <list> | 9 #include <list> |
10 | 10 |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "chrome/browser/bookmarks/bookmark_match.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | 14 #include "chrome/browser/bookmarks/bookmark_model.h" |
14 #include "chrome/browser/bookmarks/bookmark_title_match.h" | 15 #include "chrome/browser/bookmarks/bookmark_utils.h" |
15 #include "chrome/browser/history/history_service.h" | 16 #include "chrome/browser/history/history_service.h" |
16 #include "chrome/browser/history/history_service_factory.h" | 17 #include "chrome/browser/history/history_service_factory.h" |
17 #include "chrome/browser/history/query_parser.h" | 18 #include "chrome/browser/history/query_parser.h" |
18 #include "chrome/browser/history/url_database.h" | 19 #include "chrome/browser/history/url_database.h" |
| 20 #include "chrome/browser/omnibox/omnibox_field_trial.h" |
19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 21 #include "third_party/icu/source/common/unicode/normalizer2.h" |
20 | 22 |
21 namespace { | 23 namespace { |
22 | 24 |
23 // Returns a normalized version of the UTF16 string |text|. If it fails to | 25 // Returns a normalized version of the UTF16 string |text|. If it fails to |
24 // normalize the string, returns |text| itself as a best-effort. | 26 // normalize the string, returns |text| itself as a best-effort. |
25 base::string16 Normalize(const base::string16& text) { | 27 base::string16 Normalize(const base::string16& text) { |
26 UErrorCode status = U_ZERO_ERROR; | 28 UErrorCode status = U_ZERO_ERROR; |
27 const icu::Normalizer2* normalizer2 = | 29 const icu::Normalizer2* normalizer2 = |
28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 30 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 | 66 |
65 BookmarkIndex::NodeSet::const_iterator | 67 BookmarkIndex::NodeSet::const_iterator |
66 BookmarkIndex::Match::nodes_begin() const { | 68 BookmarkIndex::Match::nodes_begin() const { |
67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 69 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
68 } | 70 } |
69 | 71 |
70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 72 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
71 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 73 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
72 } | 74 } |
73 | 75 |
74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 76 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context, |
75 : browser_context_(browser_context) { | 77 const std::string& languages) |
| 78 : index_urls_(OmniboxFieldTrial::BookmarksIndexURLsValue()), |
| 79 browser_context_(browser_context), |
| 80 languages_(languages) { |
76 } | 81 } |
77 | 82 |
78 BookmarkIndex::~BookmarkIndex() { | 83 BookmarkIndex::~BookmarkIndex() { |
79 } | 84 } |
80 | 85 |
81 void BookmarkIndex::Add(const BookmarkNode* node) { | 86 void BookmarkIndex::Add(const BookmarkNode* node) { |
82 if (!node->is_url()) | 87 if (!node->is_url()) |
83 return; | 88 return; |
84 std::vector<base::string16> terms = | 89 std::vector<base::string16> terms = |
85 ExtractQueryWords(Normalize(node->GetTitle())); | 90 ExtractQueryWords(Normalize(node->GetTitle())); |
86 for (size_t i = 0; i < terms.size(); ++i) | 91 for (size_t i = 0; i < terms.size(); ++i) |
87 RegisterNode(terms[i], node); | 92 RegisterNode(terms[i], node); |
| 93 if (index_urls_) { |
| 94 terms = ExtractQueryWords( |
| 95 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); |
| 96 for (size_t i = 0; i < terms.size(); ++i) |
| 97 RegisterNode(terms[i], node); |
| 98 } |
88 } | 99 } |
89 | 100 |
90 void BookmarkIndex::Remove(const BookmarkNode* node) { | 101 void BookmarkIndex::Remove(const BookmarkNode* node) { |
91 if (!node->is_url()) | 102 if (!node->is_url()) |
92 return; | 103 return; |
93 | 104 |
94 std::vector<base::string16> terms = | 105 std::vector<base::string16> terms = |
95 ExtractQueryWords(Normalize(node->GetTitle())); | 106 ExtractQueryWords(Normalize(node->GetTitle())); |
96 for (size_t i = 0; i < terms.size(); ++i) | 107 for (size_t i = 0; i < terms.size(); ++i) |
97 UnregisterNode(terms[i], node); | 108 UnregisterNode(terms[i], node); |
| 109 if (index_urls_) { |
| 110 terms = ExtractQueryWords( |
| 111 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); |
| 112 for (size_t i = 0; i < terms.size(); ++i) |
| 113 UnregisterNode(terms[i], node); |
| 114 } |
98 } | 115 } |
99 | 116 |
100 void BookmarkIndex::GetBookmarksWithTitlesMatching( | 117 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, |
101 const base::string16& input_query, | 118 size_t max_count, |
102 size_t max_count, | 119 std::vector<BookmarkMatch>* results) { |
103 std::vector<BookmarkTitleMatch>* results) { | |
104 const base::string16 query = Normalize(input_query); | 120 const base::string16 query = Normalize(input_query); |
105 std::vector<base::string16> terms = ExtractQueryWords(query); | 121 std::vector<base::string16> terms = ExtractQueryWords(query); |
106 if (terms.empty()) | 122 if (terms.empty()) |
107 return; | 123 return; |
108 | 124 |
109 Matches matches; | 125 Matches matches; |
110 for (size_t i = 0; i < terms.size(); ++i) { | 126 for (size_t i = 0; i < terms.size(); ++i) { |
111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 127 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) |
112 return; | 128 return; |
113 } | 129 } |
114 | 130 |
115 NodeTypedCountPairs node_typed_counts; | 131 NodeTypedCountPairs node_typed_counts; |
116 SortMatches(matches, &node_typed_counts); | 132 SortMatches(matches, &node_typed_counts); |
117 | 133 |
118 // We use a QueryParser to fill in match positions for us. It's not the most | 134 // We use a QueryParser to fill in match positions for us. It's not the most |
119 // efficient way to go about this, but by the time we get here we know what | 135 // efficient way to go about this, but by the time we get here we know what |
120 // matches and so this shouldn't be performance critical. | 136 // matches and so this shouldn't be performance critical. |
121 QueryParser parser; | 137 QueryParser parser; |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
171 | 187 |
172 NodeTypedCountPair pair(*i, typed_count); | 188 NodeTypedCountPair pair(*i, typed_count); |
173 node_typed_counts->push_back(pair); | 189 node_typed_counts->push_back(pair); |
174 } | 190 } |
175 } | 191 } |
176 | 192 |
177 void BookmarkIndex::AddMatchToResults( | 193 void BookmarkIndex::AddMatchToResults( |
178 const BookmarkNode* node, | 194 const BookmarkNode* node, |
179 QueryParser* parser, | 195 QueryParser* parser, |
180 const std::vector<QueryNode*>& query_nodes, | 196 const std::vector<QueryNode*>& query_nodes, |
181 std::vector<BookmarkTitleMatch>* results) { | 197 std::vector<BookmarkMatch>* results) { |
182 BookmarkTitleMatch title_match; | |
183 // Check that the result matches the query. The previous search | 198 // Check that the result matches the query. The previous search |
184 // was a simple per-word search, while the more complex matching | 199 // was a simple per-word search, while the more complex matching |
185 // of QueryParser may filter it out. For example, the query | 200 // of QueryParser may filter it out. For example, the query |
186 // ["thi"] will match the bookmark titled [Thinking], but since | 201 // ["thi"] will match the bookmark titled [Thinking], but since |
187 // ["thi"] is quoted we don't want to do a prefix match. | 202 // ["thi"] is quoted we don't want to do a prefix match. |
188 if (parser->DoesQueryMatch(Normalize(node->GetTitle()), query_nodes, | 203 std::vector<QueryWord> title_words, url_words; |
189 &(title_match.match_positions))) { | 204 const base::string16 lower_title = |
190 title_match.node = node; | 205 base::i18n::ToLower(Normalize(node->GetTitle())); |
191 results->push_back(title_match); | 206 parser->ExtractQueryWords(lower_title, &title_words); |
| 207 if (index_urls_) { |
| 208 parser->ExtractQueryWords( |
| 209 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), |
| 210 &url_words); |
192 } | 211 } |
| 212 Snippet::MatchPositions title_matches, url_matches; |
| 213 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 214 const bool has_title_matches = |
| 215 query_nodes[i]->HasMatchIn(title_words, &title_matches); |
| 216 const bool has_url_matches = index_urls_ && |
| 217 query_nodes[i]->HasMatchIn(url_words, &url_matches); |
| 218 if (!has_title_matches && !has_url_matches) |
| 219 return; |
| 220 QueryParser::CoalesceAndSortMatchPositions(&title_matches); |
| 221 if (index_urls_) |
| 222 QueryParser::CoalesceAndSortMatchPositions(&url_matches); |
| 223 } |
| 224 BookmarkMatch match; |
| 225 if (lower_title.length() == node->GetTitle().length()) { |
| 226 // Only use title matches if the lowercase string is the same length |
| 227 // as the original string, otherwise the matches are meaningless. |
| 228 // TODO(mpearson): revise match positions appropriately. |
| 229 match.title_match_positions.swap(title_matches); |
| 230 } |
| 231 if (index_urls_) |
| 232 match.url_match_positions.swap(url_matches); |
| 233 match.node = node; |
| 234 results->push_back(match); |
193 } | 235 } |
194 | 236 |
195 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term
, | 237 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
196 bool first_term, | 238 bool first_term, |
197 Matches* matches) { | 239 Matches* matches) { |
198 Index::const_iterator i = index_.lower_bound(term); | 240 Index::const_iterator i = index_.lower_bound(term); |
199 if (i == index_.end()) | 241 if (i == index_.end()) |
200 return false; | 242 return false; |
201 | 243 |
202 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { | 244 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { |
203 // Term is too short for prefix match, compare using exact match. | 245 // Term is too short for prefix match, compare using exact match. |
204 if (i->first != term) | 246 if (i->first != term) |
205 return false; // No bookmarks with this term. | 247 return false; // No bookmarks with this term. |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 Index::iterator i = index_.find(term); | 336 Index::iterator i = index_.find(term); |
295 if (i == index_.end()) { | 337 if (i == index_.end()) { |
296 // We can get here if the node has the same term more than once. For | 338 // We can get here if the node has the same term more than once. For |
297 // example, a bookmark with the title 'foo foo' would end up here. | 339 // example, a bookmark with the title 'foo foo' would end up here. |
298 return; | 340 return; |
299 } | 341 } |
300 i->second.erase(node); | 342 i->second.erase(node); |
301 if (i->second.empty()) | 343 if (i->second.empty()) |
302 index_.erase(i); | 344 index_.erase(i); |
303 } | 345 } |
OLD | NEW |