components/bookmarks/core/browser/bookmark_index.cc - Issue 284893003: Move bookmarks/core/... to bookmarks/...

Side by Side Diff: components/bookmarks/core/browser/bookmark_index.cc

Issue 284893003: Move bookmarks/core/... to bookmarks/... (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "components/bookmarks/core/browser/bookmark_index.h"

6

7 #include <algorithm>

8 #include <functional>

9 #include <iterator>

10 #include <list>

11

12 #include "base/i18n/case_conversion.h"

13 #include "base/logging.h"

14 #include "base/strings/string16.h"

15 #include "base/strings/utf_offset_string_conversions.h"

16 #include "components/bookmarks/core/browser/bookmark_client.h"

17 #include "components/bookmarks/core/browser/bookmark_match.h"

18 #include "components/bookmarks/core/browser/bookmark_node.h"

19 #include "components/bookmarks/core/browser/bookmark_utils.h"

20 #include "components/query_parser/query_parser.h"

21 #include "components/query_parser/snippet.h"

22 #include "third_party/icu/source/common/unicode/normalizer2.h"

23

24 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair;

25 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs;

26

27 namespace {

28

29 // Returns a normalized version of the UTF16 string \|text\|. If it fails to

30 // normalize the string, returns \|text\| itself as a best-effort.

31 base::string16 Normalize(const base::string16& text) {

32 UErrorCode status = U_ZERO_ERROR;

33 const icu::Normalizer2* normalizer2 =

34 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);

35 icu::UnicodeString unicode_text(

36 text.data(), static_cast<int32_t>(text.length()));

37 icu::UnicodeString unicode_normalized_text;

38 normalizer2->normalize(unicode_text, unicode_normalized_text, status);

39 if (U_FAILURE(status))

40 return text;

41 return base::string16(unicode_normalized_text.getBuffer(),

42 unicode_normalized_text.length());

43 }

44

45 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed

46 // count so that the best matches will always be added to the results.

47 struct NodeTypedCountPairSortFunctor

48 : std::binary_function<NodeTypedCountPair, NodeTypedCountPair, bool> {

49 bool operator()(const NodeTypedCountPair& a,

50 const NodeTypedCountPair& b) const {

51 return a.second > b.second;

52 }

53 };

54

55 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair.

56 struct NodeTypedCountPairExtractNodeFunctor

57 : std::unary_function<NodeTypedCountPair, const BookmarkNode*> {

58 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const {

59 return pair.first;

60 }

61 };

62

63 } // namespace

64

65 // Used when finding the set of bookmarks that match a query. Each match

66 // represents a set of terms (as an interator into the Index) matching the

67 // query as well as the set of nodes that contain those terms in their titles.

68 struct BookmarkIndex::Match {

69 // List of terms matching the query.

70 std::list<Index::const_iterator> terms;

71

72 // The set of nodes matching the terms. As an optimization this is empty

73 // when we match only one term, and is filled in when we get more than one

74 // term. We can do this as when we have only one matching term we know

75 // the set of matching nodes is terms.front()->second.

76 //

77 // Use nodes_begin() and nodes_end() to get an iterator over the set as

78 // it handles the necessary switching between nodes and terms.front().

79 NodeSet nodes;

80

81 // Returns an iterator to the beginning of the matching nodes. See

82 // description of nodes for why this should be used over nodes.begin().

83 NodeSet::const_iterator nodes_begin() const;

84

85 // Returns an iterator to the beginning of the matching nodes. See

86 // description of nodes for why this should be used over nodes.end().

87 NodeSet::const_iterator nodes_end() const;

88 };

89

90 BookmarkIndex::NodeSet::const_iterator

91 BookmarkIndex::Match::nodes_begin() const {

92 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();

93 }

94

95 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {

96 return nodes.empty() ? terms.front()->second.end() : nodes.end();

97 }

98

99 BookmarkIndex::BookmarkIndex(BookmarkClient* client,

100 bool index_urls,

101 const std::string& languages)

102 : client_(client),

103 languages_(languages),

104 index_urls_(index_urls) {

105 DCHECK(client_);

106 }

107

108 BookmarkIndex::~BookmarkIndex() {

109 }

110

111 void BookmarkIndex::Add(const BookmarkNode* node) {

112 if (!node->is_url())

113 return;

114 std::vector<base::string16> terms =

115 ExtractQueryWords(Normalize(node->GetTitle()));

116 for (size_t i = 0; i < terms.size(); ++i)

117 RegisterNode(terms[i], node);

118 if (index_urls_) {

119 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching(

120 node->url(), languages_, NULL));

121 for (size_t i = 0; i < terms.size(); ++i)

122 RegisterNode(terms[i], node);

123 }

124 }

125

126 void BookmarkIndex::Remove(const BookmarkNode* node) {

127 if (!node->is_url())

128 return;

129

130 std::vector<base::string16> terms =

131 ExtractQueryWords(Normalize(node->GetTitle()));

132 for (size_t i = 0; i < terms.size(); ++i)

133 UnregisterNode(terms[i], node);

134 if (index_urls_) {

135 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching(

136 node->url(), languages_, NULL));

137 for (size_t i = 0; i < terms.size(); ++i)

138 UnregisterNode(terms[i], node);

139 }

140 }

141

142 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,

143 size_t max_count,

144 std::vector<BookmarkMatch>* results) {

145 const base::string16 query = Normalize(input_query);

146 std::vector<base::string16> terms = ExtractQueryWords(query);

147 if (terms.empty())

148 return;

149

150 Matches matches;

151 for (size_t i = 0; i < terms.size(); ++i) {

152 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches))

153 return;

154 }

155

156 Nodes sorted_nodes;

157 SortMatches(matches, &sorted_nodes);

158

159 // We use a QueryParser to fill in match positions for us. It's not the most

160 // efficient way to go about this, but by the time we get here we know what

161 // matches and so this shouldn't be performance critical.

162 query_parser::QueryParser parser;

163 ScopedVector<query_parser::QueryNode> query_nodes;

164 parser.ParseQueryNodes(query, &query_nodes.get());

165

166 // The highest typed counts should be at the beginning of the results vector

167 // so that the best matches will always be included in the results. The loop

168 // that calculates result relevance in HistoryContentsProvider::ConvertResults

169 // will run backwards to assure higher relevance will be attributed to the

170 // best matches.

171 for (Nodes::const_iterator i = sorted_nodes.begin();

172 i != sorted_nodes.end() && results->size() < max_count;

173 ++i)

174 AddMatchToResults(*i, &parser, query_nodes.get(), results);

175 }

176

177 void BookmarkIndex::SortMatches(const Matches& matches,

178 Nodes* sorted_nodes) const {

179 NodeSet nodes;

180 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) {

181 #if !defined(OS_ANDROID)

182 nodes.insert(i->nodes_begin(), i->nodes_end());

183 #else

184 // Work around a bug in the implementation of std::set::insert in the STL

185 // used on android (http://crbug.com/367050).

186 for (NodeSet::const_iterator n = i->nodes_begin(); n != i->nodes_end(); ++n)

187 nodes.insert(nodes.end(), *n);

188 #endif

189 }

190 sorted_nodes->reserve(sorted_nodes->size() + nodes.size());

191 if (client_->SupportsTypedCountForNodes()) {

192 NodeTypedCountPairs node_typed_counts;

193 client_->GetTypedCountForNodes(nodes, &node_typed_counts);

194 std::sort(node_typed_counts.begin(),

195 node_typed_counts.end(),

196 NodeTypedCountPairSortFunctor());

197 std::transform(node_typed_counts.begin(),

198 node_typed_counts.end(),

199 std::back_inserter(*sorted_nodes),

200 NodeTypedCountPairExtractNodeFunctor());

201 } else {

202 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end());

203 }

204 }

205

206 void BookmarkIndex::AddMatchToResults(

207 const BookmarkNode* node,

208 query_parser::QueryParser* parser,

209 const query_parser::QueryNodeStarVector& query_nodes,

210 std::vector<BookmarkMatch>* results) {

211 // Check that the result matches the query. The previous search

212 // was a simple per-word search, while the more complex matching

213 // of QueryParser may filter it out. For example, the query

214 // ["thi"] will match the bookmark titled [Thinking], but since

215 // ["thi"] is quoted we don't want to do a prefix match.

216 query_parser::QueryWordVector title_words, url_words;

217 const base::string16 lower_title =

218 base::i18n::ToLower(Normalize(node->GetTitle()));

219 parser->ExtractQueryWords(lower_title, &title_words);

220 base::OffsetAdjuster::Adjustments adjustments;

221 if (index_urls_) {

222 parser->ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching(

223 node->url(), languages_, &adjustments), &url_words);

224 }

225 query_parser::Snippet::MatchPositions title_matches, url_matches;

226 for (size_t i = 0; i < query_nodes.size(); ++i) {

227 const bool has_title_matches =

228 query_nodes[i]->HasMatchIn(title_words, &title_matches);

229 const bool has_url_matches = index_urls_ &&

230 query_nodes[i]->HasMatchIn(url_words, &url_matches);

231 if (!has_title_matches && !has_url_matches)

232 return;

233 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches);

234 if (index_urls_)

235 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches);

236 }

237 BookmarkMatch match;

238 if (lower_title.length() == node->GetTitle().length()) {

239 // Only use title matches if the lowercase string is the same length

240 // as the original string, otherwise the matches are meaningless.

241 // TODO(mpearson): revise match positions appropriately.

242 match.title_match_positions.swap(title_matches);

243 }

244 if (index_urls_) {

245 // Now that we're done processing this entry, correct the offsets of the

246 // matches in \|url_matches\| so they point to offsets in the original URL

247 // spec, not the cleaned-up URL string that we used for matching.

248 std::vector<size_t> offsets =

249 BookmarkMatch::OffsetsFromMatchPositions(url_matches);

250 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);

251 url_matches =

252 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);

253 match.url_match_positions.swap(url_matches);

254 }

255 match.node = node;

256 results->push_back(match);

257 }

258

259 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term,

260 bool first_term,

261 Matches* matches) {

262 Index::const_iterator i = index_.lower_bound(term);

263 if (i == index_.end())

264 return false;

265

266 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) {

267 // Term is too short for prefix match, compare using exact match.

268 if (i->first != term)

269 return false; // No bookmarks with this term.

270

271 if (first_term) {

272 Match match;

273 match.terms.push_back(i);

274 matches->push_back(match);

275 return true;

276 }

277 CombineMatchesInPlace(i, matches);

278 } else if (first_term) {

279 // This is the first term and we're doing a prefix match. Loop through

280 // index adding all entries that start with term to matches.

281 while (i != index_.end() &&

282 i->first.size() >= term.size() &&

283 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {

284 Match match;

285 match.terms.push_back(i);

286 matches->push_back(match);

287 ++i;

288 }

289 } else {

290 // Prefix match and not the first term. Loop through index combining

291 // current matches in matches with term, placing result in result.

292 Matches result;

293 while (i != index_.end() &&

294 i->first.size() >= term.size() &&

295 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {

296 CombineMatches(i, *matches, &result);

297 ++i;

298 }

299 matches->swap(result);

300 }

301 return !matches->empty();

302 }

303

304 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,

305 Matches* matches) {

306 for (size_t i = 0; i < matches->size(); ) {

307 Match* match = &((*matches)[i]);

308 NodeSet intersection;

309 std::set_intersection(match->nodes_begin(), match->nodes_end(),

310 index_i->second.begin(), index_i->second.end(),

311 std::inserter(intersection, intersection.begin()));

312 if (intersection.empty()) {

313 matches->erase(matches->begin() + i);

314 } else {

315 match->terms.push_back(index_i);

316 match->nodes.swap(intersection);

317 ++i;

318 }

319 }

320 }

321

322 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,

323 const Matches& current_matches,

324 Matches* result) {

325 for (size_t i = 0; i < current_matches.size(); ++i) {

326 const Match& match = current_matches[i];

327 NodeSet intersection;

328 std::set_intersection(match.nodes_begin(), match.nodes_end(),

329 index_i->second.begin(), index_i->second.end(),

330 std::inserter(intersection, intersection.begin()));

331 if (!intersection.empty()) {

332 result->push_back(Match());

333 Match& combined_match = result->back();

334 combined_match.terms = match.terms;

335 combined_match.terms.push_back(index_i);

336 combined_match.nodes.swap(intersection);

337 }

338 }

339 }

340

341 std::vector<base::string16> BookmarkIndex::ExtractQueryWords(

342 const base::string16& query) {

343 std::vector<base::string16> terms;

344 if (query.empty())

345 return std::vector<base::string16>();

346 query_parser::QueryParser parser;

347 parser.ParseQueryWords(base::i18n::ToLower(query), &terms);

348 return terms;

349 }

350

351 void BookmarkIndex::RegisterNode(const base::string16& term,

352 const BookmarkNode* node) {

353 index_[term].insert(node);

354 }

355

356 void BookmarkIndex::UnregisterNode(const base::string16& term,

357 const BookmarkNode* node) {

358 Index::iterator i = index_.find(term);

359 if (i == index_.end()) {

360 // We can get here if the node has the same term more than once. For

361 // example, a bookmark with the title 'foo foo' would end up here.

362 return;

363 }

364 i->second.erase(node);

365 if (i->second.empty())

366 index_.erase(i);

367 }

OLD	NEW