Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: chrome/browser/history/query_parser.cc

Issue 16776004: Replace FTS in the history_service with a brute force text search. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix kSafeRegexWordBoundary for Korean. Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/query_parser.h" 5 #include "chrome/browser/history/query_parser.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/i18n/break_iterator.h" 10 #include "base/i18n/break_iterator.h"
11 #include "base/i18n/case_conversion.h" 11 #include "base/i18n/case_conversion.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "base/stl_util.h" 13 #include "base/stl_util.h"
14 #include "base/strings/utf_string_conversions.h"
14 15
15 namespace { 16 namespace {
16 17
18 // Chinese/Japanese/Thai languages do not use spaces between words, so we
19 // allow any Chinese/Japanese/Thai character to match as a word boundary.
20 char kSafeRegexWordBoundary[] = "(\\b|[\\u0E00-\\u0E7F]|[\\u4E00-\\u9FFF])";
21
17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to 22 // Returns true if |mp1.first| is less than |mp2.first|. This is used to
18 // sort match positions. 23 // sort match positions.
19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, 24 int CompareMatchPosition(const Snippet::MatchPosition& mp1,
20 const Snippet::MatchPosition& mp2) { 25 const Snippet::MatchPosition& mp2) {
21 return mp1.first < mp2.first; 26 return mp1.first < mp2.first;
22 } 27 }
23 28
24 // Returns true if |mp2| intersects |mp1|. This is intended for use by 29 // Returns true if |mp2| intersects |mp1|. This is intended for use by
25 // CoalesceMatchesFrom and isn't meant as a general intersection comparison 30 // CoalesceMatchesFrom and isn't meant as a general intersection comparison
26 // function. 31 // function.
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 public: 81 public:
77 explicit QueryNodeWord(const string16& word); 82 explicit QueryNodeWord(const string16& word);
78 virtual ~QueryNodeWord(); 83 virtual ~QueryNodeWord();
79 84
80 const string16& word() const { return word_; } 85 const string16& word() const { return word_; }
81 86
82 void set_literal(bool literal) { literal_ = literal; } 87 void set_literal(bool literal) { literal_ = literal; }
83 88
84 // QueryNode: 89 // QueryNode:
85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 90 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
91 virtual void AppendSQLiteRegexpQueries(
92 std::vector<string16>* queries) const OVERRIDE;
86 virtual bool IsWord() const OVERRIDE; 93 virtual bool IsWord() const OVERRIDE;
87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; 94 virtual bool Matches(const string16& word, bool exact) const OVERRIDE;
88 virtual bool HasMatchIn( 95 virtual bool HasMatchIn(
89 const std::vector<QueryWord>& words, 96 const std::vector<QueryWord>& words,
90 Snippet::MatchPositions* match_positions) const OVERRIDE; 97 Snippet::MatchPositions* match_positions) const OVERRIDE;
91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; 98 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
92 99
93 private: 100 private:
94 string16 word_; 101 string16 word_;
95 bool literal_; 102 bool literal_;
96 103
97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); 104 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord);
98 }; 105 };
99 106
100 QueryNodeWord::QueryNodeWord(const string16& word) 107 QueryNodeWord::QueryNodeWord(const string16& word)
101 : word_(word), 108 : word_(word),
102 literal_(false) {} 109 literal_(false) {}
103 110
104 QueryNodeWord::~QueryNodeWord() {} 111 QueryNodeWord::~QueryNodeWord() {}
105 112
106 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { 113 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const {
107 query->append(word_); 114 query->append(word_);
108 115
109 // Use prefix search if we're not literal and long enough. 116 // Use prefix search if we're not literal and long enough.
110 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) 117 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
111 *query += L'*'; 118 *query += L'*';
112 return 1; 119 return 1;
113 } 120 }
114 121
122 void QueryNodeWord::AppendSQLiteRegexpQueries(
123 std::vector<string16>* queries) const {
124 string16 query = ASCIIToUTF16("(?i).*");
125 query.append(ASCIIToUTF16(kSafeRegexWordBoundary));
126 query.append(word_);
127 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
128 query.append(ASCIIToUTF16("\\w*"));
129 query.append(ASCIIToUTF16(kSafeRegexWordBoundary));
130 query.append(ASCIIToUTF16(".*"));
131
132 queries->push_back(query);
133 }
134
115 bool QueryNodeWord::IsWord() const { 135 bool QueryNodeWord::IsWord() const {
116 return true; 136 return true;
117 } 137 }
118 138
119 bool QueryNodeWord::Matches(const string16& word, bool exact) const { 139 bool QueryNodeWord::Matches(const string16& word, bool exact) const {
120 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) 140 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
121 return word == word_; 141 return word == word_;
122 return word.size() >= word_.size() && 142 return word.size() >= word_.size() &&
123 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); 143 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
124 } 144 }
(...skipping 27 matching lines...) Expand all
152 172
153 QueryNodeVector* children() { return &children_; } 173 QueryNodeVector* children() { return &children_; }
154 174
155 void AddChild(QueryNode* node); 175 void AddChild(QueryNode* node);
156 176
157 // Remove empty subnodes left over from other parsing. 177 // Remove empty subnodes left over from other parsing.
158 void RemoveEmptySubnodes(); 178 void RemoveEmptySubnodes();
159 179
160 // QueryNode: 180 // QueryNode:
161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 181 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
182 virtual void AppendSQLiteRegexpQueries(
183 std::vector<string16>* queries) const OVERRIDE;
162 virtual bool IsWord() const OVERRIDE; 184 virtual bool IsWord() const OVERRIDE;
163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; 185 virtual bool Matches(const string16& word, bool exact) const OVERRIDE;
164 virtual bool HasMatchIn( 186 virtual bool HasMatchIn(
165 const std::vector<QueryWord>& words, 187 const std::vector<QueryWord>& words,
166 Snippet::MatchPositions* match_positions) const OVERRIDE; 188 Snippet::MatchPositions* match_positions) const OVERRIDE;
167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; 189 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
168 190
169 protected: 191 protected:
170 int AppendChildrenToString(string16* query) const; 192 int AppendChildrenToString(string16* query) const;
171 193
(...skipping 25 matching lines...) Expand all
197 --i; 219 --i;
198 delete list_node; 220 delete list_node;
199 } 221 }
200 } 222 }
201 } 223 }
202 224
203 int QueryNodeList::AppendToSQLiteQuery(string16* query) const { 225 int QueryNodeList::AppendToSQLiteQuery(string16* query) const {
204 return AppendChildrenToString(query); 226 return AppendChildrenToString(query);
205 } 227 }
206 228
229 void QueryNodeList::AppendSQLiteRegexpQueries(
230 std::vector<string16>* queries) const {
231 for (QueryNodeVector::const_iterator node = children_.begin();
232 node != children_.end(); ++node) {
233 (*node)->AppendSQLiteRegexpQueries(queries);
234 }
235 }
236
207 bool QueryNodeList::IsWord() const { 237 bool QueryNodeList::IsWord() const {
208 return false; 238 return false;
209 } 239 }
210 240
211 bool QueryNodeList::Matches(const string16& word, bool exact) const { 241 bool QueryNodeList::Matches(const string16& word, bool exact) const {
212 NOTREACHED(); 242 NOTREACHED();
213 return false; 243 return false;
214 } 244 }
215 245
216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, 246 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words,
(...skipping 19 matching lines...) Expand all
236 } 266 }
237 267
238 // A QueryNodePhrase is a phrase query ("quoted"). 268 // A QueryNodePhrase is a phrase query ("quoted").
239 class QueryNodePhrase : public QueryNodeList { 269 class QueryNodePhrase : public QueryNodeList {
240 public: 270 public:
241 QueryNodePhrase(); 271 QueryNodePhrase();
242 virtual ~QueryNodePhrase(); 272 virtual ~QueryNodePhrase();
243 273
244 // QueryNodeList: 274 // QueryNodeList:
245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; 275 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE;
276 virtual void AppendSQLiteRegexpQueries(
277 std::vector<string16>* queries) const OVERRIDE;
246 virtual bool HasMatchIn( 278 virtual bool HasMatchIn(
247 const std::vector<QueryWord>& words, 279 const std::vector<QueryWord>& words,
248 Snippet::MatchPositions* match_positions) const OVERRIDE; 280 Snippet::MatchPositions* match_positions) const OVERRIDE;
249 281
250 private: 282 private:
251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); 283 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase);
252 }; 284 };
253 285
254 QueryNodePhrase::QueryNodePhrase() {} 286 QueryNodePhrase::QueryNodePhrase() {}
255 287
256 QueryNodePhrase::~QueryNodePhrase() {} 288 QueryNodePhrase::~QueryNodePhrase() {}
257 289
258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { 290 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const {
259 query->push_back(L'"'); 291 query->push_back(L'"');
260 int num_words = AppendChildrenToString(query); 292 int num_words = AppendChildrenToString(query);
261 query->push_back(L'"'); 293 query->push_back(L'"');
262 return num_words; 294 return num_words;
263 } 295 }
264 296
297 void QueryNodePhrase::AppendSQLiteRegexpQueries(
298 std::vector<string16>* queries) const {
299 string16 query = ASCIIToUTF16("(?i).*");
300 query.append(ASCIIToUTF16(kSafeRegexWordBoundary));
301 for (QueryNodeVector::const_iterator node = children_.begin();
302 node != children_.end(); ++node) {
303 (*node)->AppendToSQLiteQuery(&query);
304 if (node + 1 != children_.end()) {
305 // Match any non-word characters within phrase (enables url matching).
306 query.append(ASCIIToUTF16("\\W+"));
307 }
308 }
309 query.append(ASCIIToUTF16(kSafeRegexWordBoundary));
310 query.append(ASCIIToUTF16(".*"));
311 queries->push_back(query);
312 }
313
265 bool QueryNodePhrase::HasMatchIn( 314 bool QueryNodePhrase::HasMatchIn(
266 const std::vector<QueryWord>& words, 315 const std::vector<QueryWord>& words,
267 Snippet::MatchPositions* match_positions) const { 316 Snippet::MatchPositions* match_positions) const {
268 if (words.size() < children_.size()) 317 if (words.size() < children_.size())
269 return false; 318 return false;
270 319
271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { 320 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
272 bool matched_all = true; 321 bool matched_all = true;
273 for (size_t j = 0; j < children_.size(); ++j) { 322 for (size_t j = 0; j < children_.size(); ++j) {
274 if (!children_[j]->Matches(words[i + j].word, true)) { 323 if (!children_[j]->Matches(words[i + j].word, true)) {
(...skipping 26 matching lines...) Expand all
301 return word.size() >= minimum_length; 350 return word.size() >= minimum_length;
302 } 351 }
303 352
304 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) { 353 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) {
305 QueryNodeList root; 354 QueryNodeList root;
306 if (!ParseQueryImpl(query, &root)) 355 if (!ParseQueryImpl(query, &root))
307 return 0; 356 return 0;
308 return root.AppendToSQLiteQuery(sqlite_query); 357 return root.AppendToSQLiteQuery(sqlite_query);
309 } 358 }
310 359
360 void QueryParser::ParseQueryAsRegexps(const string16& query,
361 std::vector<string16>* sqlite_queries) {
362 QueryNodeList root;
363 if (!ParseQueryImpl(query, &root))
364 return;
365 return root.AppendSQLiteRegexpQueries(sqlite_queries);
366 }
367
311 void QueryParser::ParseQueryWords(const string16& query, 368 void QueryParser::ParseQueryWords(const string16& query,
312 std::vector<string16>* words) { 369 std::vector<string16>* words) {
313 QueryNodeList root; 370 QueryNodeList root;
314 if (!ParseQueryImpl(query, &root)) 371 if (!ParseQueryImpl(query, &root))
315 return; 372 return;
316 root.AppendWords(words); 373 root.AppendWords(words);
317 } 374 }
318 375
319 void QueryParser::ParseQueryNodes(const string16& query, 376 void QueryParser::ParseQueryNodes(const string16& query,
320 std::vector<QueryNode*>* nodes) { 377 std::vector<QueryNode*>* nodes) {
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 if (iter.IsWord()) { 465 if (iter.IsWord()) {
409 string16 word = iter.GetString(); 466 string16 word = iter.GetString();
410 if (!word.empty()) { 467 if (!word.empty()) {
411 words->push_back(QueryWord()); 468 words->push_back(QueryWord());
412 words->back().word = word; 469 words->back().word = word;
413 words->back().position = iter.prev(); 470 words->back().position = iter.prev();
414 } 471 }
415 } 472 }
416 } 473 }
417 } 474 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698