Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: chrome/browser/history/query_parser.cc

Issue 16776004: Replace FTS in the history_service with a brute force text search. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix some typos Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/query_parser.h" 5 #include "chrome/browser/history/query_parser.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/i18n/break_iterator.h" 10 #include "base/i18n/break_iterator.h"
11 #include "base/i18n/case_conversion.h" 11 #include "base/i18n/case_conversion.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "base/stl_util.h" 13 #include "base/stl_util.h"
14 #include "base/strings/utf_string_conversions.h"
14 15
15 namespace { 16 namespace {
16 17
17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to 18 // Returns true if |mp1.first| is less than |mp2.first|. This is used to
18 // sort match positions. 19 // sort match positions.
19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, 20 int CompareMatchPosition(const Snippet::MatchPosition& mp1,
20 const Snippet::MatchPosition& mp2) { 21 const Snippet::MatchPosition& mp2) {
21 return mp1.first < mp2.first; 22 return mp1.first < mp2.first;
22 } 23 }
23 24
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 82
82 void set_literal(bool literal) { literal_ = literal; } 83 void set_literal(bool literal) { literal_ = literal; }
83 84
84 // QueryNode: 85 // QueryNode:
85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 86 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
86 virtual bool IsWord() const OVERRIDE; 87 virtual bool IsWord() const OVERRIDE;
87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; 88 virtual bool Matches(const string16& word, bool exact) const OVERRIDE;
88 virtual bool HasMatchIn( 89 virtual bool HasMatchIn(
89 const std::vector<QueryWord>& words, 90 const std::vector<QueryWord>& words,
90 Snippet::MatchPositions* match_positions) const OVERRIDE; 91 Snippet::MatchPositions* match_positions) const OVERRIDE;
92 virtual bool HasMatchIn(
93 const std::vector<QueryWord>& words) const OVERRIDE;
91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; 94 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
92 95
93 private: 96 private:
94 string16 word_; 97 string16 word_;
95 bool literal_; 98 bool literal_;
96 99
97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); 100 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord);
98 }; 101 };
99 102
100 QueryNodeWord::QueryNodeWord(const string16& word) 103 QueryNodeWord::QueryNodeWord(const string16& word)
(...skipping 30 matching lines...) Expand all
131 size_t match_start = words[i].position; 134 size_t match_start = words[i].position;
132 match_positions->push_back( 135 match_positions->push_back(
133 Snippet::MatchPosition(match_start, 136 Snippet::MatchPosition(match_start,
134 match_start + static_cast<int>(word_.size()))); 137 match_start + static_cast<int>(word_.size())));
135 matched = true; 138 matched = true;
136 } 139 }
137 } 140 }
138 return matched; 141 return matched;
139 } 142 }
140 143
144 bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words) const {
145 for (size_t i = 0; i < words.size(); ++i) {
146 if (Matches(words[i].word, false))
147 return true;
148 }
149 return false;
150 }
151
141 void QueryNodeWord::AppendWords(std::vector<string16>* words) const { 152 void QueryNodeWord::AppendWords(std::vector<string16>* words) const {
142 words->push_back(word_); 153 words->push_back(word_);
143 } 154 }
144 155
145 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. 156 // A QueryNodeList has a collection of QueryNodes which are deleted in the end.
146 class QueryNodeList : public QueryNode { 157 class QueryNodeList : public QueryNode {
147 public: 158 public:
148 typedef std::vector<QueryNode*> QueryNodeVector; 159 typedef std::vector<QueryNode*> QueryNodeVector;
149 160
150 QueryNodeList(); 161 QueryNodeList();
151 virtual ~QueryNodeList(); 162 virtual ~QueryNodeList();
152 163
153 QueryNodeVector* children() { return &children_; } 164 QueryNodeVector* children() { return &children_; }
154 165
155 void AddChild(QueryNode* node); 166 void AddChild(QueryNode* node);
156 167
157 // Remove empty subnodes left over from other parsing. 168 // Remove empty subnodes left over from other parsing.
158 void RemoveEmptySubnodes(); 169 void RemoveEmptySubnodes();
159 170
160 // QueryNode: 171 // QueryNode:
161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 172 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
162 virtual bool IsWord() const OVERRIDE; 173 virtual bool IsWord() const OVERRIDE;
163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; 174 virtual bool Matches(const string16& word, bool exact) const OVERRIDE;
164 virtual bool HasMatchIn( 175 virtual bool HasMatchIn(
165 const std::vector<QueryWord>& words, 176 const std::vector<QueryWord>& words,
166 Snippet::MatchPositions* match_positions) const OVERRIDE; 177 Snippet::MatchPositions* match_positions) const OVERRIDE;
178 virtual bool HasMatchIn(
179 const std::vector<QueryWord>& words) const OVERRIDE;
167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; 180 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
168 181
169 protected: 182 protected:
170 int AppendChildrenToString(string16* query) const; 183 int AppendChildrenToString(string16* query) const;
171 184
172 QueryNodeVector children_; 185 QueryNodeVector children_;
173 186
174 private: 187 private:
175 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); 188 DISALLOW_COPY_AND_ASSIGN(QueryNodeList);
176 }; 189 };
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
212 NOTREACHED(); 225 NOTREACHED();
213 return false; 226 return false;
214 } 227 }
215 228
216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, 229 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words,
217 Snippet::MatchPositions* match_positions) const { 230 Snippet::MatchPositions* match_positions) const {
218 NOTREACHED(); 231 NOTREACHED();
219 return false; 232 return false;
220 } 233 }
221 234
235 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words) const {
236 NOTREACHED();
237 return false;
238 }
239
222 void QueryNodeList::AppendWords(std::vector<string16>* words) const { 240 void QueryNodeList::AppendWords(std::vector<string16>* words) const {
223 for (size_t i = 0; i < children_.size(); ++i) 241 for (size_t i = 0; i < children_.size(); ++i)
224 children_[i]->AppendWords(words); 242 children_[i]->AppendWords(words);
225 } 243 }
226 244
227 int QueryNodeList::AppendChildrenToString(string16* query) const { 245 int QueryNodeList::AppendChildrenToString(string16* query) const {
228 int num_words = 0; 246 int num_words = 0;
229 for (QueryNodeVector::const_iterator node = children_.begin(); 247 for (QueryNodeVector::const_iterator node = children_.begin();
230 node != children_.end(); ++node) { 248 node != children_.end(); ++node) {
231 if (node != children_.begin()) 249 if (node != children_.begin())
232 query->push_back(L' '); 250 query->push_back(L' ');
233 num_words += (*node)->AppendToSQLiteQuery(query); 251 num_words += (*node)->AppendToSQLiteQuery(query);
234 } 252 }
235 return num_words; 253 return num_words;
236 } 254 }
237 255
238 // A QueryNodePhrase is a phrase query ("quoted"). 256 // A QueryNodePhrase is a phrase query ("quoted").
239 class QueryNodePhrase : public QueryNodeList { 257 class QueryNodePhrase : public QueryNodeList {
240 public: 258 public:
241 QueryNodePhrase(); 259 QueryNodePhrase();
242 virtual ~QueryNodePhrase(); 260 virtual ~QueryNodePhrase();
243 261
244 // QueryNodeList: 262 // QueryNodeList:
245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 263 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
246 virtual bool HasMatchIn( 264 virtual bool HasMatchIn(
247 const std::vector<QueryWord>& words, 265 const std::vector<QueryWord>& words,
248 Snippet::MatchPositions* match_positions) const OVERRIDE; 266 Snippet::MatchPositions* match_positions) const OVERRIDE;
267 virtual bool HasMatchIn(
268 const std::vector<QueryWord>& words) const OVERRIDE;
249 269
250 private: 270 private:
271 bool MatchesAll(const std::vector<QueryWord>& words,
272 const QueryWord** first_word,
273 const QueryWord** last_word) const;
251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); 274 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase);
252 }; 275 };
253 276
254 QueryNodePhrase::QueryNodePhrase() {} 277 QueryNodePhrase::QueryNodePhrase() {}
255 278
256 QueryNodePhrase::~QueryNodePhrase() {} 279 QueryNodePhrase::~QueryNodePhrase() {}
257 280
258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { 281 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const {
259 query->push_back(L'"'); 282 query->push_back(L'"');
260 int num_words = AppendChildrenToString(query); 283 int num_words = AppendChildrenToString(query);
261 query->push_back(L'"'); 284 query->push_back(L'"');
262 return num_words; 285 return num_words;
263 } 286 }
264 287
265 bool QueryNodePhrase::HasMatchIn( 288 bool QueryNodePhrase::MatchesAll(const std::vector<QueryWord>& words,
266 const std::vector<QueryWord>& words, 289 const QueryWord** first_word,
267 Snippet::MatchPositions* match_positions) const { 290 const QueryWord** last_word) const {
268 if (words.size() < children_.size()) 291 if (words.size() < children_.size())
269 return false; 292 return false;
270 293
271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { 294 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
272 bool matched_all = true; 295 bool matched_all = true;
273 for (size_t j = 0; j < children_.size(); ++j) { 296 for (size_t j = 0; j < children_.size(); ++j) {
274 if (!children_[j]->Matches(words[i + j].word, true)) { 297 if (!children_[j]->Matches(words[i + j].word, true)) {
275 matched_all = false; 298 matched_all = false;
276 break; 299 break;
277 } 300 }
278 } 301 }
279 if (matched_all) { 302 if (matched_all) {
280 const QueryWord& last_word = words[i + children_.size() - 1]; 303 *first_word = &words[i];
281 match_positions->push_back( 304 *last_word = &words[i + children_.size() - 1];
282 Snippet::MatchPosition(words[i].position,
283 last_word.position + last_word.word.length()));
284 return true; 305 return true;
285 } 306 }
286 } 307 }
287 return false; 308 return false;
288 } 309 }
289 310
311 bool QueryNodePhrase::HasMatchIn(
312 const std::vector<QueryWord>& words,
313 Snippet::MatchPositions* match_positions) const {
314 const QueryWord* first_word;
315 const QueryWord* last_word;
316
317 if (MatchesAll(words, &first_word, &last_word)) {
318 match_positions->push_back(
319 Snippet::MatchPosition(first_word->position,
320 last_word->position + last_word->word.length()));
321 return true;
322 }
323 return false;
324 }
325
326 bool QueryNodePhrase::HasMatchIn(const std::vector<QueryWord>& words) const {
327 const QueryWord* first_word;
328 const QueryWord* last_word;
329 return MatchesAll(words, &first_word, &last_word);
330 }
331
290 QueryParser::QueryParser() {} 332 QueryParser::QueryParser() {}
291 333
292 // static 334 // static
293 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { 335 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) {
294 DCHECK(!word.empty()); 336 DCHECK(!word.empty());
295 size_t minimum_length = 3; 337 size_t minimum_length = 3;
296 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) 338 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
297 // because they 'behave like' Latin letters. Moreover, we should 339 // because they 'behave like' Latin letters. Moreover, we should
298 // normalize the former before reaching here. 340 // normalize the former before reaching here.
299 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) 341 if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
347 // TODO(sky): we need a better way to align the positions so that we don't 389 // TODO(sky): we need a better way to align the positions so that we don't
348 // completely punt here. 390 // completely punt here.
349 match_positions->clear(); 391 match_positions->clear();
350 } else { 392 } else {
351 CoalseAndSortMatchPositions(&matches); 393 CoalseAndSortMatchPositions(&matches);
352 match_positions->swap(matches); 394 match_positions->swap(matches);
353 } 395 }
354 return true; 396 return true;
355 } 397 }
356 398
399 bool QueryParser::DoesQueryMatch(const std::vector<QueryWord>& query_words,
400 const std::vector<QueryNode*>& query_nodes) {
401 if (query_nodes.empty() || query_words.empty())
402 return false;
403
404 for (size_t i = 0; i < query_nodes.size(); ++i) {
405 if (!query_nodes[i]->HasMatchIn(query_words))
406 return false;
407 }
408 return true;
409 }
410
357 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { 411 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) {
358 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); 412 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
359 // TODO(evanm): support a locale here 413 // TODO(evanm): support a locale here
360 if (!iter.Init()) 414 if (!iter.Init())
361 return false; 415 return false;
362 416
363 // To handle nesting, we maintain a stack of QueryNodeLists. 417 // To handle nesting, we maintain a stack of QueryNodeLists.
364 // The last element (back) of the stack contains the current, deepest node. 418 // The last element (back) of the stack contains the current, deepest node.
365 std::vector<QueryNodeList*> query_stack; 419 std::vector<QueryNodeList*> query_stack;
366 query_stack.push_back(root); 420 query_stack.push_back(root);
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 if (iter.IsWord()) { 462 if (iter.IsWord()) {
409 string16 word = iter.GetString(); 463 string16 word = iter.GetString();
410 if (!word.empty()) { 464 if (!word.empty()) {
411 words->push_back(QueryWord()); 465 words->push_back(QueryWord());
412 words->back().word = word; 466 words->back().word = word;
413 words->back().position = iter.prev(); 467 words->back().position = iter.prev();
414 } 468 }
415 } 469 }
416 } 470 }
417 } 471 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698