OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/query_parser.h" | 5 #include "chrome/browser/history/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/stl_util.h" | 13 #include "base/stl_util.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
14 | 15 |
15 namespace { | 16 namespace { |
16 | 17 |
| 18 // Chinese/Japanese/Thai languages do not use spaces between words, so we |
| 19 // allow any Chinese/Japanese/Thai character to match as a word boundary. |
| 20 char kSafeRegexWordBoundary[] = "(\\b|[\\u0E00-\\u0E7F]|[\\u4E00-\\u9FFF])"; |
| 21 |
17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 22 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
18 // sort match positions. | 23 // sort match positions. |
19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 24 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
20 const Snippet::MatchPosition& mp2) { | 25 const Snippet::MatchPosition& mp2) { |
21 return mp1.first < mp2.first; | 26 return mp1.first < mp2.first; |
22 } | 27 } |
23 | 28 |
24 // Returns true if |mp2| intersects |mp1|. This is intended for use by | 29 // Returns true if |mp2| intersects |mp1|. This is intended for use by |
25 // CoalesceMatchesFrom and isn't meant as a general intersection comparison | 30 // CoalesceMatchesFrom and isn't meant as a general intersection comparison |
26 // function. | 31 // function. |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
76 public: | 81 public: |
77 explicit QueryNodeWord(const string16& word); | 82 explicit QueryNodeWord(const string16& word); |
78 virtual ~QueryNodeWord(); | 83 virtual ~QueryNodeWord(); |
79 | 84 |
80 const string16& word() const { return word_; } | 85 const string16& word() const { return word_; } |
81 | 86 |
82 void set_literal(bool literal) { literal_ = literal; } | 87 void set_literal(bool literal) { literal_ = literal; } |
83 | 88 |
84 // QueryNode: | 89 // QueryNode: |
85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 90 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 91 virtual void AppendSQLiteRegexpQueries( |
| 92 std::vector<string16>* queries) const OVERRIDE; |
86 virtual bool IsWord() const OVERRIDE; | 93 virtual bool IsWord() const OVERRIDE; |
87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 94 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
88 virtual bool HasMatchIn( | 95 virtual bool HasMatchIn( |
89 const std::vector<QueryWord>& words, | 96 const std::vector<QueryWord>& words, |
90 Snippet::MatchPositions* match_positions) const OVERRIDE; | 97 Snippet::MatchPositions* match_positions) const OVERRIDE; |
91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 98 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
92 | 99 |
93 private: | 100 private: |
94 string16 word_; | 101 string16 word_; |
95 bool literal_; | 102 bool literal_; |
96 | 103 |
97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 104 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
98 }; | 105 }; |
99 | 106 |
100 QueryNodeWord::QueryNodeWord(const string16& word) | 107 QueryNodeWord::QueryNodeWord(const string16& word) |
101 : word_(word), | 108 : word_(word), |
102 literal_(false) {} | 109 literal_(false) {} |
103 | 110 |
104 QueryNodeWord::~QueryNodeWord() {} | 111 QueryNodeWord::~QueryNodeWord() {} |
105 | 112 |
106 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { | 113 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { |
107 query->append(word_); | 114 query->append(word_); |
108 | 115 |
109 // Use prefix search if we're not literal and long enough. | 116 // Use prefix search if we're not literal and long enough. |
110 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 117 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
111 *query += L'*'; | 118 *query += L'*'; |
112 return 1; | 119 return 1; |
113 } | 120 } |
114 | 121 |
| 122 void QueryNodeWord::AppendSQLiteRegexpQueries( |
| 123 std::vector<string16>* queries) const { |
| 124 string16 query = ASCIIToUTF16("(?i).*"); |
| 125 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 126 query.append(word_); |
| 127 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
| 128 query.append(ASCIIToUTF16("\\w*")); |
| 129 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 130 query.append(ASCIIToUTF16(".*")); |
| 131 |
| 132 queries->push_back(query); |
| 133 } |
| 134 |
115 bool QueryNodeWord::IsWord() const { | 135 bool QueryNodeWord::IsWord() const { |
116 return true; | 136 return true; |
117 } | 137 } |
118 | 138 |
119 bool QueryNodeWord::Matches(const string16& word, bool exact) const { | 139 bool QueryNodeWord::Matches(const string16& word, bool exact) const { |
120 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 140 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
121 return word == word_; | 141 return word == word_; |
122 return word.size() >= word_.size() && | 142 return word.size() >= word_.size() && |
123 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); | 143 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); |
124 } | 144 } |
(...skipping 27 matching lines...) Expand all Loading... |
152 | 172 |
153 QueryNodeVector* children() { return &children_; } | 173 QueryNodeVector* children() { return &children_; } |
154 | 174 |
155 void AddChild(QueryNode* node); | 175 void AddChild(QueryNode* node); |
156 | 176 |
157 // Remove empty subnodes left over from other parsing. | 177 // Remove empty subnodes left over from other parsing. |
158 void RemoveEmptySubnodes(); | 178 void RemoveEmptySubnodes(); |
159 | 179 |
160 // QueryNode: | 180 // QueryNode: |
161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 181 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 182 virtual void AppendSQLiteRegexpQueries( |
| 183 std::vector<string16>* queries) const OVERRIDE; |
162 virtual bool IsWord() const OVERRIDE; | 184 virtual bool IsWord() const OVERRIDE; |
163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 185 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
164 virtual bool HasMatchIn( | 186 virtual bool HasMatchIn( |
165 const std::vector<QueryWord>& words, | 187 const std::vector<QueryWord>& words, |
166 Snippet::MatchPositions* match_positions) const OVERRIDE; | 188 Snippet::MatchPositions* match_positions) const OVERRIDE; |
167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 189 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
168 | 190 |
169 protected: | 191 protected: |
170 int AppendChildrenToString(string16* query) const; | 192 int AppendChildrenToString(string16* query) const; |
171 | 193 |
(...skipping 25 matching lines...) Expand all Loading... |
197 --i; | 219 --i; |
198 delete list_node; | 220 delete list_node; |
199 } | 221 } |
200 } | 222 } |
201 } | 223 } |
202 | 224 |
203 int QueryNodeList::AppendToSQLiteQuery(string16* query) const { | 225 int QueryNodeList::AppendToSQLiteQuery(string16* query) const { |
204 return AppendChildrenToString(query); | 226 return AppendChildrenToString(query); |
205 } | 227 } |
206 | 228 |
| 229 void QueryNodeList::AppendSQLiteRegexpQueries( |
| 230 std::vector<string16>* queries) const { |
| 231 for (QueryNodeVector::const_iterator node = children_.begin(); |
| 232 node != children_.end(); ++node) { |
| 233 (*node)->AppendSQLiteRegexpQueries(queries); |
| 234 } |
| 235 } |
| 236 |
207 bool QueryNodeList::IsWord() const { | 237 bool QueryNodeList::IsWord() const { |
208 return false; | 238 return false; |
209 } | 239 } |
210 | 240 |
211 bool QueryNodeList::Matches(const string16& word, bool exact) const { | 241 bool QueryNodeList::Matches(const string16& word, bool exact) const { |
212 NOTREACHED(); | 242 NOTREACHED(); |
213 return false; | 243 return false; |
214 } | 244 } |
215 | 245 |
216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, | 246 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, |
(...skipping 19 matching lines...) Expand all Loading... |
236 } | 266 } |
237 | 267 |
238 // A QueryNodePhrase is a phrase query ("quoted"). | 268 // A QueryNodePhrase is a phrase query ("quoted"). |
239 class QueryNodePhrase : public QueryNodeList { | 269 class QueryNodePhrase : public QueryNodeList { |
240 public: | 270 public: |
241 QueryNodePhrase(); | 271 QueryNodePhrase(); |
242 virtual ~QueryNodePhrase(); | 272 virtual ~QueryNodePhrase(); |
243 | 273 |
244 // QueryNodeList: | 274 // QueryNodeList: |
245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 275 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 276 virtual void AppendSQLiteRegexpQueries( |
| 277 std::vector<string16>* queries) const OVERRIDE; |
246 virtual bool HasMatchIn( | 278 virtual bool HasMatchIn( |
247 const std::vector<QueryWord>& words, | 279 const std::vector<QueryWord>& words, |
248 Snippet::MatchPositions* match_positions) const OVERRIDE; | 280 Snippet::MatchPositions* match_positions) const OVERRIDE; |
249 | 281 |
250 private: | 282 private: |
251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); | 283 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); |
252 }; | 284 }; |
253 | 285 |
254 QueryNodePhrase::QueryNodePhrase() {} | 286 QueryNodePhrase::QueryNodePhrase() {} |
255 | 287 |
256 QueryNodePhrase::~QueryNodePhrase() {} | 288 QueryNodePhrase::~QueryNodePhrase() {} |
257 | 289 |
258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { | 290 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { |
259 query->push_back(L'"'); | 291 query->push_back(L'"'); |
260 int num_words = AppendChildrenToString(query); | 292 int num_words = AppendChildrenToString(query); |
261 query->push_back(L'"'); | 293 query->push_back(L'"'); |
262 return num_words; | 294 return num_words; |
263 } | 295 } |
264 | 296 |
| 297 void QueryNodePhrase::AppendSQLiteRegexpQueries( |
| 298 std::vector<string16>* queries) const { |
| 299 string16 query = ASCIIToUTF16("(?i).*"); |
| 300 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 301 for (QueryNodeVector::const_iterator node = children_.begin(); |
| 302 node != children_.end(); ++node) { |
| 303 (*node)->AppendToSQLiteQuery(&query); |
| 304 if (node + 1 != children_.end()) { |
| 305 // Match any non-word characters within phrase (enables url matching). |
| 306 query.append(ASCIIToUTF16("\\W+")); |
| 307 } |
| 308 } |
| 309 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 310 query.append(ASCIIToUTF16(".*")); |
| 311 queries->push_back(query); |
| 312 } |
| 313 |
265 bool QueryNodePhrase::HasMatchIn( | 314 bool QueryNodePhrase::HasMatchIn( |
266 const std::vector<QueryWord>& words, | 315 const std::vector<QueryWord>& words, |
267 Snippet::MatchPositions* match_positions) const { | 316 Snippet::MatchPositions* match_positions) const { |
268 if (words.size() < children_.size()) | 317 if (words.size() < children_.size()) |
269 return false; | 318 return false; |
270 | 319 |
271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { | 320 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { |
272 bool matched_all = true; | 321 bool matched_all = true; |
273 for (size_t j = 0; j < children_.size(); ++j) { | 322 for (size_t j = 0; j < children_.size(); ++j) { |
274 if (!children_[j]->Matches(words[i + j].word, true)) { | 323 if (!children_[j]->Matches(words[i + j].word, true)) { |
(...skipping 26 matching lines...) Expand all Loading... |
301 return word.size() >= minimum_length; | 350 return word.size() >= minimum_length; |
302 } | 351 } |
303 | 352 |
304 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) { | 353 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) { |
305 QueryNodeList root; | 354 QueryNodeList root; |
306 if (!ParseQueryImpl(query, &root)) | 355 if (!ParseQueryImpl(query, &root)) |
307 return 0; | 356 return 0; |
308 return root.AppendToSQLiteQuery(sqlite_query); | 357 return root.AppendToSQLiteQuery(sqlite_query); |
309 } | 358 } |
310 | 359 |
| 360 void QueryParser::ParseQueryAsRegexps(const string16& query, |
| 361 std::vector<string16>* sqlite_queries) { |
| 362 QueryNodeList root; |
| 363 if (!ParseQueryImpl(query, &root)) |
| 364 return; |
| 365 return root.AppendSQLiteRegexpQueries(sqlite_queries); |
| 366 } |
| 367 |
311 void QueryParser::ParseQueryWords(const string16& query, | 368 void QueryParser::ParseQueryWords(const string16& query, |
312 std::vector<string16>* words) { | 369 std::vector<string16>* words) { |
313 QueryNodeList root; | 370 QueryNodeList root; |
314 if (!ParseQueryImpl(query, &root)) | 371 if (!ParseQueryImpl(query, &root)) |
315 return; | 372 return; |
316 root.AppendWords(words); | 373 root.AppendWords(words); |
317 } | 374 } |
318 | 375 |
319 void QueryParser::ParseQueryNodes(const string16& query, | 376 void QueryParser::ParseQueryNodes(const string16& query, |
320 std::vector<QueryNode*>* nodes) { | 377 std::vector<QueryNode*>* nodes) { |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 if (iter.IsWord()) { | 465 if (iter.IsWord()) { |
409 string16 word = iter.GetString(); | 466 string16 word = iter.GetString(); |
410 if (!word.empty()) { | 467 if (!word.empty()) { |
411 words->push_back(QueryWord()); | 468 words->push_back(QueryWord()); |
412 words->back().word = word; | 469 words->back().word = word; |
413 words->back().position = iter.prev(); | 470 words->back().position = iter.prev(); |
414 } | 471 } |
415 } | 472 } |
416 } | 473 } |
417 } | 474 } |
OLD | NEW |