components/bookmarks/browser/bookmark_index.cc - Issue 2569333003: Rename BookmarkIndex to TitledUrlIndex and BookmarkMatch to TitledUrlMatch

Side by Side Diff: components/bookmarks/browser/bookmark_index.cc

Issue 2569333003: Rename BookmarkIndex to TitledUrlIndex and BookmarkMatch to TitledUrlMatch (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « components/bookmarks/browser/bookmark_index.h ('k') | components/bookmarks/browser/bookmark_index_unittest.cc » ('j') | components/bookmarks/browser/titled_url_index.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "components/bookmarks/browser/bookmark_index.h"

6

7 #include <stdint.h>

8

9 #include "base/i18n/case_conversion.h"

10 #include "base/logging.h"

11 #include "base/stl_util.h"

12 #include "base/strings/utf_offset_string_conversions.h"

13 #include "build/build_config.h"

14 #include "components/bookmarks/browser/bookmark_match.h"

15 #include "components/bookmarks/browser/bookmark_utils.h"

16 #include "components/bookmarks/browser/titled_url_node.h"

17 #include "components/bookmarks/browser/titled_url_node_sorter.h"

18 #include "components/query_parser/snippet.h"

19 #include "third_party/icu/source/common/unicode/normalizer2.h"

20 #include "third_party/icu/source/common/unicode/utypes.h"

21

22 namespace bookmarks {

23

24 namespace {

25

26 // Returns a normalized version of the UTF16 string \|text\|. If it fails to

27 // normalize the string, returns \|text\| itself as a best-effort.

28 base::string16 Normalize(const base::string16& text) {

29 UErrorCode status = U_ZERO_ERROR;

30 const icu::Normalizer2* normalizer2 =

31 icu::Normalizer2::getInstance(nullptr, "nfkc", UNORM2_COMPOSE, status);

32 if (U_FAILURE(status)) {

33 // Log and crash right away to capture the error code in the crash report.

34 LOG(FATAL) << "failed to create a normalizer: " << u_errorName(status);

35 }

36 icu::UnicodeString unicode_text(

37 text.data(), static_cast<int32_t>(text.length()));

38 icu::UnicodeString unicode_normalized_text;

39 normalizer2->normalize(unicode_text, unicode_normalized_text, status);

40 if (U_FAILURE(status)) {

41 // This should not happen. Log the error and fall back.

42 LOG(ERROR) << "normalization failed: " << u_errorName(status);

43 return text;

44 }

45 return base::string16(unicode_normalized_text.getBuffer(),

46 unicode_normalized_text.length());

47 }

48

49 } // namespace

50

51 BookmarkIndex::BookmarkIndex(std::unique_ptr<TitledUrlNodeSorter> sorter)

52 : sorter_(std::move(sorter)) {

53 }

54

55 BookmarkIndex::~BookmarkIndex() {

56 }

57

58 void BookmarkIndex::Add(const TitledUrlNode* node) {

59 std::vector<base::string16> terms =

60 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));

61 for (size_t i = 0; i < terms.size(); ++i)

62 RegisterNode(terms[i], node);

63 terms = ExtractQueryWords(

64 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));

65 for (size_t i = 0; i < terms.size(); ++i)

66 RegisterNode(terms[i], node);

67 }

68

69 void BookmarkIndex::Remove(const TitledUrlNode* node) {

70 std::vector<base::string16> terms =

71 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));

72 for (size_t i = 0; i < terms.size(); ++i)

73 UnregisterNode(terms[i], node);

74 terms = ExtractQueryWords(

75 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));

76 for (size_t i = 0; i < terms.size(); ++i)

77 UnregisterNode(terms[i], node);

78 }

79

80 void BookmarkIndex::GetResultsMatching(

81 const base::string16& input_query,

82 size_t max_count,

83 query_parser::MatchingAlgorithm matching_algorithm,

84 std::vector<BookmarkMatch>* results) {

85 const base::string16 query = Normalize(input_query);

86 std::vector<base::string16> terms = ExtractQueryWords(query);

87 if (terms.empty())

88 return;

89

90 TitledUrlNodeSet matches;

91 for (size_t i = 0; i < terms.size(); ++i) {

92 if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm,

93 &matches)) {

94 return;

95 }

96 }

97

98 TitledUrlNodes sorted_nodes;

99 SortMatches(matches, &sorted_nodes);

100

101 // We use a QueryParser to fill in match positions for us. It's not the most

102 // efficient way to go about this, but by the time we get here we know what

103 // matches and so this shouldn't be performance critical.

104 query_parser::QueryParser parser;

105 query_parser::QueryNodeVector query_nodes;

106 parser.ParseQueryNodes(query, matching_algorithm, &query_nodes);

107

108 // The highest typed counts should be at the beginning of the results vector

109 // so that the best matches will always be included in the results. The loop

110 // that calculates result relevance in HistoryContentsProvider::ConvertResults

111 // will run backwards to assure higher relevance will be attributed to the

112 // best matches.

113 for (TitledUrlNodes::const_iterator i = sorted_nodes.begin();

114 i != sorted_nodes.end() && results->size() < max_count;

115 ++i)

116 AddMatchToResults(*i, &parser, query_nodes, results);

117 }

118

119 void BookmarkIndex::SortMatches(const TitledUrlNodeSet& matches,

120 TitledUrlNodes* sorted_nodes) const {

121 if (sorter_) {

122 sorter_->SortMatches(matches, sorted_nodes);

123 } else {

124 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end());

125 }

126 }

127

128 void BookmarkIndex::AddMatchToResults(

129 const TitledUrlNode* node,

130 query_parser::QueryParser* parser,

131 const query_parser::QueryNodeVector& query_nodes,

132 std::vector<BookmarkMatch>* results) {

133 if (!node) {

134 return;

135 }

136 // Check that the result matches the query. The previous search

137 // was a simple per-word search, while the more complex matching

138 // of QueryParser may filter it out. For example, the query

139 // ["thi"] will match the title [Thinking], but since

140 // ["thi"] is quoted we don't want to do a prefix match.

141 query_parser::QueryWordVector title_words, url_words;

142 const base::string16 lower_title =

143 base::i18n::ToLower(Normalize(node->GetTitledUrlNodeTitle()));

144 parser->ExtractQueryWords(lower_title, &title_words);

145 base::OffsetAdjuster::Adjustments adjustments;

146 parser->ExtractQueryWords(

147 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), &adjustments),

148 &url_words);

149 query_parser::Snippet::MatchPositions title_matches, url_matches;

150 for (const auto& node : query_nodes) {

151 const bool has_title_matches =

152 node->HasMatchIn(title_words, &title_matches);

153 const bool has_url_matches = node->HasMatchIn(url_words, &url_matches);

154 if (!has_title_matches && !has_url_matches)

155 return;

156 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches);

157 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches);

158 }

159 BookmarkMatch match;

160 if (lower_title.length() == node->GetTitledUrlNodeTitle().length()) {

161 // Only use title matches if the lowercase string is the same length

162 // as the original string, otherwise the matches are meaningless.

163 // TODO(mpearson): revise match positions appropriately.

164 match.title_match_positions.swap(title_matches);

165 }

166 // Now that we're done processing this entry, correct the offsets of the

167 // matches in \|url_matches\| so they point to offsets in the original URL

168 // spec, not the cleaned-up URL string that we used for matching.

169 std::vector<size_t> offsets =

170 BookmarkMatch::OffsetsFromMatchPositions(url_matches);

171 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);

172 url_matches =

173 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);

174 match.url_match_positions.swap(url_matches);

175 match.node = node;

176 results->push_back(match);

177 }

178

179 bool BookmarkIndex::GetResultsMatchingTerm(

180 const base::string16& term,

181 bool first_term,

182 query_parser::MatchingAlgorithm matching_algorithm,

183 TitledUrlNodeSet* matches) {

184 Index::const_iterator i = index_.lower_bound(term);

185 if (i == index_.end())

186 return false;

187

188 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(

189 term, matching_algorithm)) {

190 // Term is too short for prefix match, compare using exact match.

191 if (i->first != term)

192 return false; // No title/URL pairs with this term.

193

194 if (first_term) {

195 (*matches) = i->second;

196 return true;

197 }

198 matches = base::STLSetIntersection<TitledUrlNodeSet>(i->second, matches);

199 } else {

200 // Loop through index adding all entries that start with term to

201 // \|prefix_matches\|.

202 TitledUrlNodeSet tmp_prefix_matches;

203 // If this is the first term, then store the result directly in \|matches\|

204 // to avoid calling stl intersection (which requires a copy).

205 TitledUrlNodeSet* prefix_matches =

206 first_term ? matches : &tmp_prefix_matches;

207 while (i != index_.end() &&

208 i->first.size() >= term.size() &&

209 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {

210 #if !defined(OS_ANDROID)

211 prefix_matches->insert(i->second.begin(), i->second.end());

212 #else

213 // Work around a bug in the implementation of std::set::insert in the STL

214 // used on android (http://crbug.com/367050).

215 for (TitledUrlNodeSet::const_iterator n = i->second.begin();

216 n != i->second.end();

217 ++n)

218 prefix_matches->insert(prefix_matches->end(), *n);

219 #endif

220 ++i;

221 }

222 if (!first_term) {

223 *matches =

224 base::STLSetIntersection<TitledUrlNodeSet>(prefix_matches, matches);

225 }

226 }

227 return !matches->empty();

228 }

229

230 std::vector<base::string16> BookmarkIndex::ExtractQueryWords(

231 const base::string16& query) {

232 std::vector<base::string16> terms;

233 if (query.empty())

234 return std::vector<base::string16>();

235 query_parser::QueryParser parser;

236 parser.ParseQueryWords(base::i18n::ToLower(query),

237 query_parser::MatchingAlgorithm::DEFAULT,

238 &terms);

239 return terms;

240 }

241

242 void BookmarkIndex::RegisterNode(const base::string16& term,

243 const TitledUrlNode* node) {

244 index_[term].insert(node);

245 }

246

247 void BookmarkIndex::UnregisterNode(const base::string16& term,

248 const TitledUrlNode* node) {

249 Index::iterator i = index_.find(term);

250 if (i == index_.end()) {

251 // We can get here if the node has the same term more than once. For

252 // example, a node with the title 'foo foo' would end up here.

253 return;

254 }

255 i->second.erase(node);

256 if (i->second.empty())

257 index_.erase(i);

258 }

259

260 } // namespace bookmarks

OLD	NEW