OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <list> | 9 #include <list> |
10 | 10 |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "chrome/browser/bookmarks/bookmark_match.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | 14 #include "chrome/browser/bookmarks/bookmark_model.h" |
14 #include "chrome/browser/bookmarks/bookmark_title_match.h" | 15 #include "chrome/browser/bookmarks/bookmark_utils.h" |
15 #include "chrome/browser/history/history_service.h" | 16 #include "chrome/browser/history/history_service.h" |
16 #include "chrome/browser/history/history_service_factory.h" | 17 #include "chrome/browser/history/history_service_factory.h" |
17 #include "chrome/browser/history/query_parser.h" | 18 #include "chrome/browser/history/query_parser.h" |
18 #include "chrome/browser/history/url_database.h" | 19 #include "chrome/browser/history/url_database.h" |
19 | 20 |
20 // Used when finding the set of bookmarks that match a query. Each match | 21 // Used when finding the set of bookmarks that match a query. Each match |
21 // represents a set of terms (as an interator into the Index) matching the | 22 // represents a set of terms (as an interator into the Index) matching the |
22 // query as well as the set of nodes that contain those terms in their titles. | 23 // query as well as the set of nodes that contain those terms in their titles. |
23 struct BookmarkIndex::Match { | 24 struct BookmarkIndex::Match { |
24 // List of terms matching the query. | 25 // List of terms matching the query. |
(...skipping 19 matching lines...) Expand all Loading... |
44 | 45 |
45 BookmarkIndex::NodeSet::const_iterator | 46 BookmarkIndex::NodeSet::const_iterator |
46 BookmarkIndex::Match::nodes_begin() const { | 47 BookmarkIndex::Match::nodes_begin() const { |
47 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 48 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
48 } | 49 } |
49 | 50 |
50 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 51 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
51 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 52 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
52 } | 53 } |
53 | 54 |
54 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 55 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context, |
55 : browser_context_(browser_context) { | 56 const std::string& languages) |
| 57 : browser_context_(browser_context), |
| 58 languages_(languages) { |
56 } | 59 } |
57 | 60 |
58 BookmarkIndex::~BookmarkIndex() { | 61 BookmarkIndex::~BookmarkIndex() { |
59 } | 62 } |
60 | 63 |
61 void BookmarkIndex::Add(const BookmarkNode* node) { | 64 void BookmarkIndex::Add(const BookmarkNode* node) { |
62 if (!node->is_url()) | 65 if (!node->is_url()) |
63 return; | 66 return; |
64 std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle()); | 67 std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle()); |
65 for (size_t i = 0; i < terms.size(); ++i) | 68 for (size_t i = 0; i < terms.size(); ++i) |
66 RegisterNode(terms[i], node); | 69 RegisterNode(terms[i], node); |
| 70 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching(node->url(), |
| 71 languages_)); |
| 72 for (size_t i = 0; i < terms.size(); ++i) |
| 73 RegisterNode(terms[i], node); |
67 } | 74 } |
68 | 75 |
69 void BookmarkIndex::Remove(const BookmarkNode* node) { | 76 void BookmarkIndex::Remove(const BookmarkNode* node) { |
70 if (!node->is_url()) | 77 if (!node->is_url()) |
71 return; | 78 return; |
72 | 79 |
73 std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle()); | 80 std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle()); |
74 for (size_t i = 0; i < terms.size(); ++i) | 81 for (size_t i = 0; i < terms.size(); ++i) |
75 UnregisterNode(terms[i], node); | 82 UnregisterNode(terms[i], node); |
| 83 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching(node->url(), |
| 84 languages_)); |
| 85 for (size_t i = 0; i < terms.size(); ++i) |
| 86 UnregisterNode(terms[i], node); |
76 } | 87 } |
77 | 88 |
78 void BookmarkIndex::GetBookmarksWithTitlesMatching( | 89 void BookmarkIndex::GetBookmarksMatching(const base::string16& query, |
79 const base::string16& query, | 90 size_t max_count, |
80 size_t max_count, | 91 std::vector<BookmarkMatch>* results) { |
81 std::vector<BookmarkTitleMatch>* results) { | |
82 std::vector<base::string16> terms = ExtractQueryWords(query); | 92 std::vector<base::string16> terms = ExtractQueryWords(query); |
83 if (terms.empty()) | 93 if (terms.empty()) |
84 return; | 94 return; |
85 | 95 |
86 Matches matches; | 96 Matches matches; |
87 for (size_t i = 0; i < terms.size(); ++i) { | 97 for (size_t i = 0; i < terms.size(); ++i) { |
88 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 98 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) |
89 return; | 99 return; |
90 } | 100 } |
91 | 101 |
92 NodeTypedCountPairs node_typed_counts; | 102 NodeTypedCountPairs node_typed_counts; |
93 SortMatches(matches, &node_typed_counts); | 103 SortMatches(matches, &node_typed_counts); |
94 | 104 |
95 // We use a QueryParser to fill in match positions for us. It's not the most | 105 // We use a QueryParser to fill in match positions for us. It's not the most |
96 // efficient way to go about this, but by the time we get here we know what | 106 // efficient way to go about this, but by the time we get here we know what |
97 // matches and so this shouldn't be performance critical. | 107 // matches and so this shouldn't be performance critical. |
98 QueryParser parser; | 108 QueryParser parser; |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
142 url_db->GetRowForURL((*i)->url(), &url); | 152 url_db->GetRowForURL((*i)->url(), &url); |
143 NodeTypedCountPair pair(*i, url.typed_count()); | 153 NodeTypedCountPair pair(*i, url.typed_count()); |
144 node_typed_counts->push_back(pair); | 154 node_typed_counts->push_back(pair); |
145 } | 155 } |
146 } | 156 } |
147 | 157 |
148 void BookmarkIndex::AddMatchToResults( | 158 void BookmarkIndex::AddMatchToResults( |
149 const BookmarkNode* node, | 159 const BookmarkNode* node, |
150 QueryParser* parser, | 160 QueryParser* parser, |
151 const std::vector<QueryNode*>& query_nodes, | 161 const std::vector<QueryNode*>& query_nodes, |
152 std::vector<BookmarkTitleMatch>* results) { | 162 std::vector<BookmarkMatch>* results) { |
153 BookmarkTitleMatch title_match; | 163 BookmarkMatch match; |
154 // Check that the result matches the query. The previous search | 164 // Check that the result matches the query. The previous search |
155 // was a simple per-word search, while the more complex matching | 165 // was a simple per-word search, while the more complex matching |
156 // of QueryParser may filter it out. For example, the query | 166 // of QueryParser may filter it out. For example, the query |
157 // ["thi"] will match the bookmark titled [Thinking], but since | 167 // ["thi"] will match the bookmark titled [Thinking], but since |
158 // ["thi"] is quoted we don't want to do a prefix match. | 168 // ["thi"] is quoted we don't want to do a prefix match. |
159 if (parser->DoesQueryMatch(node->GetTitle(), query_nodes, | 169 std::vector<QueryWord> title_words, url_words; |
160 &(title_match.match_positions))) { | 170 parser->ExtractQueryWords( |
161 title_match.node = node; | 171 base::i18n::ToLower(node->GetTitle()), &title_words); |
162 results->push_back(title_match); | 172 parser->ExtractQueryWords( |
| 173 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), |
| 174 &url_words); |
| 175 Snippet::MatchPositions title_matches, url_matches; |
| 176 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 177 const bool has_title_matches = |
| 178 query_nodes[i]->HasMatchIn(title_words, &title_matches); |
| 179 const bool has_url_matches = |
| 180 query_nodes[i]->HasMatchIn(url_words, &url_matches); |
| 181 if (!has_title_matches && !has_url_matches) |
| 182 return; |
| 183 QueryParser::CoalseAndSortMatchPositions(&title_matches); |
| 184 QueryParser::CoalseAndSortMatchPositions(&url_matches); |
163 } | 185 } |
| 186 match.title_match_positions.swap(title_matches); |
| 187 match.url_match_positions.swap(url_matches); |
| 188 match.node = node; |
| 189 results->push_back(match); |
164 } | 190 } |
165 | 191 |
166 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term
, | 192 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
167 bool first_term, | 193 bool first_term, |
168 Matches* matches) { | 194 Matches* matches) { |
169 Index::const_iterator i = index_.lower_bound(term); | 195 Index::const_iterator i = index_.lower_bound(term); |
170 if (i == index_.end()) | 196 if (i == index_.end()) |
171 return false; | 197 return false; |
172 | 198 |
173 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { | 199 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { |
174 // Term is too short for prefix match, compare using exact match. | 200 // Term is too short for prefix match, compare using exact match. |
175 if (i->first != term) | 201 if (i->first != term) |
176 return false; // No bookmarks with this term. | 202 return false; // No bookmarks with this term. |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
267 Index::iterator i = index_.find(term); | 293 Index::iterator i = index_.find(term); |
268 if (i == index_.end()) { | 294 if (i == index_.end()) { |
269 // We can get here if the node has the same term more than once. For | 295 // We can get here if the node has the same term more than once. For |
270 // example, a bookmark with the title 'foo foo' would end up here. | 296 // example, a bookmark with the title 'foo foo' would end up here. |
271 return; | 297 return; |
272 } | 298 } |
273 i->second.erase(node); | 299 i->second.erase(node); |
274 if (i->second.empty()) | 300 if (i->second.empty()) |
275 index_.erase(i); | 301 index_.erase(i); |
276 } | 302 } |
OLD | NEW |