| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/query_parser.h" | 5 #include "chrome/browser/history/query_parser.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
| 11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/stl_util.h" | 13 #include "base/stl_util.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
| 14 | 15 |
| 15 namespace { | 16 namespace { |
| 16 | 17 |
| 18 // Chinese/Japanese/Thai languages do not use spaces between words, so we |
| 19 // allow any Chinese/Japanese/Thai character to match as a word boundary. |
| 20 char kSafeRegexWordBoundary[] = "(\\b|[\\u0E00-\\u0E7F]|[\\u4E00-\\u9FFF])"; |
| 21 |
| 17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 22 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
| 18 // sort match positions. | 23 // sort match positions. |
| 19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 24 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
| 20 const Snippet::MatchPosition& mp2) { | 25 const Snippet::MatchPosition& mp2) { |
| 21 return mp1.first < mp2.first; | 26 return mp1.first < mp2.first; |
| 22 } | 27 } |
| 23 | 28 |
| 24 // Returns true if |mp2| intersects |mp1|. This is intended for use by | 29 // Returns true if |mp2| intersects |mp1|. This is intended for use by |
| 25 // CoalesceMatchesFrom and isn't meant as a general intersection comparison | 30 // CoalesceMatchesFrom and isn't meant as a general intersection comparison |
| 26 // function. | 31 // function. |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 76 public: | 81 public: |
| 77 explicit QueryNodeWord(const string16& word); | 82 explicit QueryNodeWord(const string16& word); |
| 78 virtual ~QueryNodeWord(); | 83 virtual ~QueryNodeWord(); |
| 79 | 84 |
| 80 const string16& word() const { return word_; } | 85 const string16& word() const { return word_; } |
| 81 | 86 |
| 82 void set_literal(bool literal) { literal_ = literal; } | 87 void set_literal(bool literal) { literal_ = literal; } |
| 83 | 88 |
| 84 // QueryNode: | 89 // QueryNode: |
| 85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 90 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 91 virtual void AppendSQLiteRegexpQueries( |
| 92 std::vector<string16>* queries) const OVERRIDE; |
| 86 virtual bool IsWord() const OVERRIDE; | 93 virtual bool IsWord() const OVERRIDE; |
| 87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 94 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
| 88 virtual bool HasMatchIn( | 95 virtual bool HasMatchIn( |
| 89 const std::vector<QueryWord>& words, | 96 const std::vector<QueryWord>& words, |
| 90 Snippet::MatchPositions* match_positions) const OVERRIDE; | 97 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 98 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
| 92 | 99 |
| 93 private: | 100 private: |
| 94 string16 word_; | 101 string16 word_; |
| 95 bool literal_; | 102 bool literal_; |
| 96 | 103 |
| 97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 104 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
| 98 }; | 105 }; |
| 99 | 106 |
| 100 QueryNodeWord::QueryNodeWord(const string16& word) | 107 QueryNodeWord::QueryNodeWord(const string16& word) |
| 101 : word_(word), | 108 : word_(word), |
| 102 literal_(false) {} | 109 literal_(false) {} |
| 103 | 110 |
| 104 QueryNodeWord::~QueryNodeWord() {} | 111 QueryNodeWord::~QueryNodeWord() {} |
| 105 | 112 |
| 106 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { | 113 int QueryNodeWord::AppendToSQLiteQuery(string16* query) const { |
| 107 query->append(word_); | 114 query->append(word_); |
| 108 | 115 |
| 109 // Use prefix search if we're not literal and long enough. | 116 // Use prefix search if we're not literal and long enough. |
| 110 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 117 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
| 111 *query += L'*'; | 118 *query += L'*'; |
| 112 return 1; | 119 return 1; |
| 113 } | 120 } |
| 114 | 121 |
| 122 void QueryNodeWord::AppendSQLiteRegexpQueries( |
| 123 std::vector<string16>* queries) const { |
| 124 string16 query = ASCIIToUTF16("(?i).*"); |
| 125 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 126 query.append(word_); |
| 127 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
| 128 query.append(ASCIIToUTF16("\\w*")); |
| 129 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 130 query.append(ASCIIToUTF16(".*")); |
| 131 |
| 132 queries->push_back(query); |
| 133 } |
| 134 |
| 115 bool QueryNodeWord::IsWord() const { | 135 bool QueryNodeWord::IsWord() const { |
| 116 return true; | 136 return true; |
| 117 } | 137 } |
| 118 | 138 |
| 119 bool QueryNodeWord::Matches(const string16& word, bool exact) const { | 139 bool QueryNodeWord::Matches(const string16& word, bool exact) const { |
| 120 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 140 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) |
| 121 return word == word_; | 141 return word == word_; |
| 122 return word.size() >= word_.size() && | 142 return word.size() >= word_.size() && |
| 123 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); | 143 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); |
| 124 } | 144 } |
| (...skipping 27 matching lines...) Expand all Loading... |
| 152 | 172 |
| 153 QueryNodeVector* children() { return &children_; } | 173 QueryNodeVector* children() { return &children_; } |
| 154 | 174 |
| 155 void AddChild(QueryNode* node); | 175 void AddChild(QueryNode* node); |
| 156 | 176 |
| 157 // Remove empty subnodes left over from other parsing. | 177 // Remove empty subnodes left over from other parsing. |
| 158 void RemoveEmptySubnodes(); | 178 void RemoveEmptySubnodes(); |
| 159 | 179 |
| 160 // QueryNode: | 180 // QueryNode: |
| 161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 181 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 182 virtual void AppendSQLiteRegexpQueries( |
| 183 std::vector<string16>* queries) const OVERRIDE; |
| 162 virtual bool IsWord() const OVERRIDE; | 184 virtual bool IsWord() const OVERRIDE; |
| 163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 185 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
| 164 virtual bool HasMatchIn( | 186 virtual bool HasMatchIn( |
| 165 const std::vector<QueryWord>& words, | 187 const std::vector<QueryWord>& words, |
| 166 Snippet::MatchPositions* match_positions) const OVERRIDE; | 188 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 189 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
| 168 | 190 |
| 169 protected: | 191 protected: |
| 170 int AppendChildrenToString(string16* query) const; | 192 int AppendChildrenToString(string16* query) const; |
| 171 | 193 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 197 --i; | 219 --i; |
| 198 delete list_node; | 220 delete list_node; |
| 199 } | 221 } |
| 200 } | 222 } |
| 201 } | 223 } |
| 202 | 224 |
| 203 int QueryNodeList::AppendToSQLiteQuery(string16* query) const { | 225 int QueryNodeList::AppendToSQLiteQuery(string16* query) const { |
| 204 return AppendChildrenToString(query); | 226 return AppendChildrenToString(query); |
| 205 } | 227 } |
| 206 | 228 |
| 229 void QueryNodeList::AppendSQLiteRegexpQueries( |
| 230 std::vector<string16>* queries) const { |
| 231 for (QueryNodeVector::const_iterator node = children_.begin(); |
| 232 node != children_.end(); ++node) { |
| 233 (*node)->AppendSQLiteRegexpQueries(queries); |
| 234 } |
| 235 } |
| 236 |
| 207 bool QueryNodeList::IsWord() const { | 237 bool QueryNodeList::IsWord() const { |
| 208 return false; | 238 return false; |
| 209 } | 239 } |
| 210 | 240 |
| 211 bool QueryNodeList::Matches(const string16& word, bool exact) const { | 241 bool QueryNodeList::Matches(const string16& word, bool exact) const { |
| 212 NOTREACHED(); | 242 NOTREACHED(); |
| 213 return false; | 243 return false; |
| 214 } | 244 } |
| 215 | 245 |
| 216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, | 246 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, |
| (...skipping 19 matching lines...) Expand all Loading... |
| 236 } | 266 } |
| 237 | 267 |
| 238 // A QueryNodePhrase is a phrase query ("quoted"). | 268 // A QueryNodePhrase is a phrase query ("quoted"). |
| 239 class QueryNodePhrase : public QueryNodeList { | 269 class QueryNodePhrase : public QueryNodeList { |
| 240 public: | 270 public: |
| 241 QueryNodePhrase(); | 271 QueryNodePhrase(); |
| 242 virtual ~QueryNodePhrase(); | 272 virtual ~QueryNodePhrase(); |
| 243 | 273 |
| 244 // QueryNodeList: | 274 // QueryNodeList: |
| 245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 275 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 276 virtual void AppendSQLiteRegexpQueries( |
| 277 std::vector<string16>* queries) const OVERRIDE; |
| 246 virtual bool HasMatchIn( | 278 virtual bool HasMatchIn( |
| 247 const std::vector<QueryWord>& words, | 279 const std::vector<QueryWord>& words, |
| 248 Snippet::MatchPositions* match_positions) const OVERRIDE; | 280 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 249 | 281 |
| 250 private: | 282 private: |
| 251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); | 283 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); |
| 252 }; | 284 }; |
| 253 | 285 |
| 254 QueryNodePhrase::QueryNodePhrase() {} | 286 QueryNodePhrase::QueryNodePhrase() {} |
| 255 | 287 |
| 256 QueryNodePhrase::~QueryNodePhrase() {} | 288 QueryNodePhrase::~QueryNodePhrase() {} |
| 257 | 289 |
| 258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { | 290 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { |
| 259 query->push_back(L'"'); | 291 query->push_back(L'"'); |
| 260 int num_words = AppendChildrenToString(query); | 292 int num_words = AppendChildrenToString(query); |
| 261 query->push_back(L'"'); | 293 query->push_back(L'"'); |
| 262 return num_words; | 294 return num_words; |
| 263 } | 295 } |
| 264 | 296 |
| 297 void QueryNodePhrase::AppendSQLiteRegexpQueries( |
| 298 std::vector<string16>* queries) const { |
| 299 string16 query = ASCIIToUTF16("(?i).*"); |
| 300 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 301 for (QueryNodeVector::const_iterator node = children_.begin(); |
| 302 node != children_.end(); ++node) { |
| 303 (*node)->AppendToSQLiteQuery(&query); |
| 304 if (node + 1 != children_.end()) { |
| 305 // Match any non-word characters within phrase (enables url matching). |
| 306 query.append(ASCIIToUTF16("\\W+")); |
| 307 } |
| 308 } |
| 309 query.append(ASCIIToUTF16(kSafeRegexWordBoundary)); |
| 310 query.append(ASCIIToUTF16(".*")); |
| 311 queries->push_back(query); |
| 312 } |
| 313 |
| 265 bool QueryNodePhrase::HasMatchIn( | 314 bool QueryNodePhrase::HasMatchIn( |
| 266 const std::vector<QueryWord>& words, | 315 const std::vector<QueryWord>& words, |
| 267 Snippet::MatchPositions* match_positions) const { | 316 Snippet::MatchPositions* match_positions) const { |
| 268 if (words.size() < children_.size()) | 317 if (words.size() < children_.size()) |
| 269 return false; | 318 return false; |
| 270 | 319 |
| 271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { | 320 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { |
| 272 bool matched_all = true; | 321 bool matched_all = true; |
| 273 for (size_t j = 0; j < children_.size(); ++j) { | 322 for (size_t j = 0; j < children_.size(); ++j) { |
| 274 if (!children_[j]->Matches(words[i + j].word, true)) { | 323 if (!children_[j]->Matches(words[i + j].word, true)) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 301 return word.size() >= minimum_length; | 350 return word.size() >= minimum_length; |
| 302 } | 351 } |
| 303 | 352 |
| 304 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) { | 353 int QueryParser::ParseQuery(const string16& query, string16* sqlite_query) { |
| 305 QueryNodeList root; | 354 QueryNodeList root; |
| 306 if (!ParseQueryImpl(query, &root)) | 355 if (!ParseQueryImpl(query, &root)) |
| 307 return 0; | 356 return 0; |
| 308 return root.AppendToSQLiteQuery(sqlite_query); | 357 return root.AppendToSQLiteQuery(sqlite_query); |
| 309 } | 358 } |
| 310 | 359 |
| 360 void QueryParser::ParseQueryAsRegexps(const string16& query, |
| 361 std::vector<string16>* sqlite_queries) { |
| 362 QueryNodeList root; |
| 363 if (!ParseQueryImpl(query, &root)) |
| 364 return; |
| 365 return root.AppendSQLiteRegexpQueries(sqlite_queries); |
| 366 } |
| 367 |
| 311 void QueryParser::ParseQueryWords(const string16& query, | 368 void QueryParser::ParseQueryWords(const string16& query, |
| 312 std::vector<string16>* words) { | 369 std::vector<string16>* words) { |
| 313 QueryNodeList root; | 370 QueryNodeList root; |
| 314 if (!ParseQueryImpl(query, &root)) | 371 if (!ParseQueryImpl(query, &root)) |
| 315 return; | 372 return; |
| 316 root.AppendWords(words); | 373 root.AppendWords(words); |
| 317 } | 374 } |
| 318 | 375 |
| 319 void QueryParser::ParseQueryNodes(const string16& query, | 376 void QueryParser::ParseQueryNodes(const string16& query, |
| 320 std::vector<QueryNode*>* nodes) { | 377 std::vector<QueryNode*>* nodes) { |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 408 if (iter.IsWord()) { | 465 if (iter.IsWord()) { |
| 409 string16 word = iter.GetString(); | 466 string16 word = iter.GetString(); |
| 410 if (!word.empty()) { | 467 if (!word.empty()) { |
| 411 words->push_back(QueryWord()); | 468 words->push_back(QueryWord()); |
| 412 words->back().word = word; | 469 words->back().word = word; |
| 413 words->back().position = iter.prev(); | 470 words->back().position = iter.prev(); |
| 414 } | 471 } |
| 415 } | 472 } |
| 416 } | 473 } |
| 417 } | 474 } |
| OLD | NEW |