| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/query_parser.h" | 5 #include "chrome/browser/history/query_parser.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
| 11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/stl_util.h" | 13 #include "base/stl_util.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
| 14 | 15 |
| 15 namespace { | 16 namespace { |
| 16 | 17 |
| 17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 18 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
| 18 // sort match positions. | 19 // sort match positions. |
| 19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 20 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
| 20 const Snippet::MatchPosition& mp2) { | 21 const Snippet::MatchPosition& mp2) { |
| 21 return mp1.first < mp2.first; | 22 return mp1.first < mp2.first; |
| 22 } | 23 } |
| 23 | 24 |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 81 | 82 |
| 82 void set_literal(bool literal) { literal_ = literal; } | 83 void set_literal(bool literal) { literal_ = literal; } |
| 83 | 84 |
| 84 // QueryNode: | 85 // QueryNode: |
| 85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 86 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 86 virtual bool IsWord() const OVERRIDE; | 87 virtual bool IsWord() const OVERRIDE; |
| 87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 88 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
| 88 virtual bool HasMatchIn( | 89 virtual bool HasMatchIn( |
| 89 const std::vector<QueryWord>& words, | 90 const std::vector<QueryWord>& words, |
| 90 Snippet::MatchPositions* match_positions) const OVERRIDE; | 91 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 92 virtual bool HasMatchIn( |
| 93 const std::vector<QueryWord>& words) const OVERRIDE; |
| 91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 94 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
| 92 | 95 |
| 93 private: | 96 private: |
| 94 string16 word_; | 97 string16 word_; |
| 95 bool literal_; | 98 bool literal_; |
| 96 | 99 |
| 97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 100 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
| 98 }; | 101 }; |
| 99 | 102 |
| 100 QueryNodeWord::QueryNodeWord(const string16& word) | 103 QueryNodeWord::QueryNodeWord(const string16& word) |
| (...skipping 30 matching lines...) Expand all Loading... |
| 131 size_t match_start = words[i].position; | 134 size_t match_start = words[i].position; |
| 132 match_positions->push_back( | 135 match_positions->push_back( |
| 133 Snippet::MatchPosition(match_start, | 136 Snippet::MatchPosition(match_start, |
| 134 match_start + static_cast<int>(word_.size()))); | 137 match_start + static_cast<int>(word_.size()))); |
| 135 matched = true; | 138 matched = true; |
| 136 } | 139 } |
| 137 } | 140 } |
| 138 return matched; | 141 return matched; |
| 139 } | 142 } |
| 140 | 143 |
| 144 bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 145 for (size_t i = 0; i < words.size(); ++i) { |
| 146 if (Matches(words[i].word, false)) |
| 147 return true; |
| 148 } |
| 149 return false; |
| 150 } |
| 151 |
| 141 void QueryNodeWord::AppendWords(std::vector<string16>* words) const { | 152 void QueryNodeWord::AppendWords(std::vector<string16>* words) const { |
| 142 words->push_back(word_); | 153 words->push_back(word_); |
| 143 } | 154 } |
| 144 | 155 |
| 145 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. | 156 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. |
| 146 class QueryNodeList : public QueryNode { | 157 class QueryNodeList : public QueryNode { |
| 147 public: | 158 public: |
| 148 typedef std::vector<QueryNode*> QueryNodeVector; | 159 typedef std::vector<QueryNode*> QueryNodeVector; |
| 149 | 160 |
| 150 QueryNodeList(); | 161 QueryNodeList(); |
| 151 virtual ~QueryNodeList(); | 162 virtual ~QueryNodeList(); |
| 152 | 163 |
| 153 QueryNodeVector* children() { return &children_; } | 164 QueryNodeVector* children() { return &children_; } |
| 154 | 165 |
| 155 void AddChild(QueryNode* node); | 166 void AddChild(QueryNode* node); |
| 156 | 167 |
| 157 // Remove empty subnodes left over from other parsing. | 168 // Remove empty subnodes left over from other parsing. |
| 158 void RemoveEmptySubnodes(); | 169 void RemoveEmptySubnodes(); |
| 159 | 170 |
| 160 // QueryNode: | 171 // QueryNode: |
| 161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 172 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 162 virtual bool IsWord() const OVERRIDE; | 173 virtual bool IsWord() const OVERRIDE; |
| 163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 174 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
| 164 virtual bool HasMatchIn( | 175 virtual bool HasMatchIn( |
| 165 const std::vector<QueryWord>& words, | 176 const std::vector<QueryWord>& words, |
| 166 Snippet::MatchPositions* match_positions) const OVERRIDE; | 177 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 178 virtual bool HasMatchIn( |
| 179 const std::vector<QueryWord>& words) const OVERRIDE; |
| 167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 180 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
| 168 | 181 |
| 169 protected: | 182 protected: |
| 170 int AppendChildrenToString(string16* query) const; | 183 int AppendChildrenToString(string16* query) const; |
| 171 | 184 |
| 172 QueryNodeVector children_; | 185 QueryNodeVector children_; |
| 173 | 186 |
| 174 private: | 187 private: |
| 175 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); | 188 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); |
| 176 }; | 189 }; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 212 NOTREACHED(); | 225 NOTREACHED(); |
| 213 return false; | 226 return false; |
| 214 } | 227 } |
| 215 | 228 |
| 216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, | 229 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, |
| 217 Snippet::MatchPositions* match_positions) const { | 230 Snippet::MatchPositions* match_positions) const { |
| 218 NOTREACHED(); | 231 NOTREACHED(); |
| 219 return false; | 232 return false; |
| 220 } | 233 } |
| 221 | 234 |
| 235 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 236 NOTREACHED(); |
| 237 return false; |
| 238 } |
| 239 |
| 222 void QueryNodeList::AppendWords(std::vector<string16>* words) const { | 240 void QueryNodeList::AppendWords(std::vector<string16>* words) const { |
| 223 for (size_t i = 0; i < children_.size(); ++i) | 241 for (size_t i = 0; i < children_.size(); ++i) |
| 224 children_[i]->AppendWords(words); | 242 children_[i]->AppendWords(words); |
| 225 } | 243 } |
| 226 | 244 |
| 227 int QueryNodeList::AppendChildrenToString(string16* query) const { | 245 int QueryNodeList::AppendChildrenToString(string16* query) const { |
| 228 int num_words = 0; | 246 int num_words = 0; |
| 229 for (QueryNodeVector::const_iterator node = children_.begin(); | 247 for (QueryNodeVector::const_iterator node = children_.begin(); |
| 230 node != children_.end(); ++node) { | 248 node != children_.end(); ++node) { |
| 231 if (node != children_.begin()) | 249 if (node != children_.begin()) |
| 232 query->push_back(L' '); | 250 query->push_back(L' '); |
| 233 num_words += (*node)->AppendToSQLiteQuery(query); | 251 num_words += (*node)->AppendToSQLiteQuery(query); |
| 234 } | 252 } |
| 235 return num_words; | 253 return num_words; |
| 236 } | 254 } |
| 237 | 255 |
| 238 // A QueryNodePhrase is a phrase query ("quoted"). | 256 // A QueryNodePhrase is a phrase query ("quoted"). |
| 239 class QueryNodePhrase : public QueryNodeList { | 257 class QueryNodePhrase : public QueryNodeList { |
| 240 public: | 258 public: |
| 241 QueryNodePhrase(); | 259 QueryNodePhrase(); |
| 242 virtual ~QueryNodePhrase(); | 260 virtual ~QueryNodePhrase(); |
| 243 | 261 |
| 244 // QueryNodeList: | 262 // QueryNodeList: |
| 245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 263 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
| 246 virtual bool HasMatchIn( | 264 virtual bool HasMatchIn( |
| 247 const std::vector<QueryWord>& words, | 265 const std::vector<QueryWord>& words, |
| 248 Snippet::MatchPositions* match_positions) const OVERRIDE; | 266 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 267 virtual bool HasMatchIn( |
| 268 const std::vector<QueryWord>& words) const OVERRIDE; |
| 249 | 269 |
| 250 private: | 270 private: |
| 271 bool MatchesAll(const std::vector<QueryWord>& words, |
| 272 const QueryWord** first_word, |
| 273 const QueryWord** last_word) const; |
| 251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); | 274 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); |
| 252 }; | 275 }; |
| 253 | 276 |
| 254 QueryNodePhrase::QueryNodePhrase() {} | 277 QueryNodePhrase::QueryNodePhrase() {} |
| 255 | 278 |
| 256 QueryNodePhrase::~QueryNodePhrase() {} | 279 QueryNodePhrase::~QueryNodePhrase() {} |
| 257 | 280 |
| 258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { | 281 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { |
| 259 query->push_back(L'"'); | 282 query->push_back(L'"'); |
| 260 int num_words = AppendChildrenToString(query); | 283 int num_words = AppendChildrenToString(query); |
| 261 query->push_back(L'"'); | 284 query->push_back(L'"'); |
| 262 return num_words; | 285 return num_words; |
| 263 } | 286 } |
| 264 | 287 |
| 265 bool QueryNodePhrase::HasMatchIn( | 288 bool QueryNodePhrase::MatchesAll(const std::vector<QueryWord>& words, |
| 266 const std::vector<QueryWord>& words, | 289 const QueryWord** first_word, |
| 267 Snippet::MatchPositions* match_positions) const { | 290 const QueryWord** last_word) const { |
| 268 if (words.size() < children_.size()) | 291 if (words.size() < children_.size()) |
| 269 return false; | 292 return false; |
| 270 | 293 |
| 271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { | 294 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { |
| 272 bool matched_all = true; | 295 bool matched_all = true; |
| 273 for (size_t j = 0; j < children_.size(); ++j) { | 296 for (size_t j = 0; j < children_.size(); ++j) { |
| 274 if (!children_[j]->Matches(words[i + j].word, true)) { | 297 if (!children_[j]->Matches(words[i + j].word, true)) { |
| 275 matched_all = false; | 298 matched_all = false; |
| 276 break; | 299 break; |
| 277 } | 300 } |
| 278 } | 301 } |
| 279 if (matched_all) { | 302 if (matched_all) { |
| 280 const QueryWord& last_word = words[i + children_.size() - 1]; | 303 *first_word = &words[i]; |
| 281 match_positions->push_back( | 304 *last_word = &words[i + children_.size() - 1]; |
| 282 Snippet::MatchPosition(words[i].position, | |
| 283 last_word.position + last_word.word.length())); | |
| 284 return true; | 305 return true; |
| 285 } | 306 } |
| 286 } | 307 } |
| 287 return false; | 308 return false; |
| 288 } | 309 } |
| 289 | 310 |
| 311 bool QueryNodePhrase::HasMatchIn( |
| 312 const std::vector<QueryWord>& words, |
| 313 Snippet::MatchPositions* match_positions) const { |
| 314 const QueryWord* first_word; |
| 315 const QueryWord* last_word; |
| 316 |
| 317 if (MatchesAll(words, &first_word, &last_word)) { |
| 318 match_positions->push_back( |
| 319 Snippet::MatchPosition(first_word->position, |
| 320 last_word->position + last_word->word.length())); |
| 321 return true; |
| 322 } |
| 323 return false; |
| 324 } |
| 325 |
| 326 bool QueryNodePhrase::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 327 const QueryWord* first_word; |
| 328 const QueryWord* last_word; |
| 329 return MatchesAll(words, &first_word, &last_word); |
| 330 } |
| 331 |
| 290 QueryParser::QueryParser() {} | 332 QueryParser::QueryParser() {} |
| 291 | 333 |
| 292 // static | 334 // static |
| 293 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { | 335 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { |
| 294 DCHECK(!word.empty()); | 336 DCHECK(!word.empty()); |
| 295 size_t minimum_length = 3; | 337 size_t minimum_length = 3; |
| 296 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) | 338 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) |
| 297 // because they 'behave like' Latin letters. Moreover, we should | 339 // because they 'behave like' Latin letters. Moreover, we should |
| 298 // normalize the former before reaching here. | 340 // normalize the former before reaching here. |
| 299 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) | 341 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 347 // TODO(sky): we need a better way to align the positions so that we don't | 389 // TODO(sky): we need a better way to align the positions so that we don't |
| 348 // completely punt here. | 390 // completely punt here. |
| 349 match_positions->clear(); | 391 match_positions->clear(); |
| 350 } else { | 392 } else { |
| 351 CoalseAndSortMatchPositions(&matches); | 393 CoalseAndSortMatchPositions(&matches); |
| 352 match_positions->swap(matches); | 394 match_positions->swap(matches); |
| 353 } | 395 } |
| 354 return true; | 396 return true; |
| 355 } | 397 } |
| 356 | 398 |
| 399 bool QueryParser::DoesQueryMatch(const std::vector<QueryWord>& query_words, |
| 400 const std::vector<QueryNode*>& query_nodes) { |
| 401 if (query_nodes.empty() || query_words.empty()) |
| 402 return false; |
| 403 |
| 404 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 405 if (!query_nodes[i]->HasMatchIn(query_words)) |
| 406 return false; |
| 407 } |
| 408 return true; |
| 409 } |
| 410 |
| 357 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { | 411 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { |
| 358 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 412 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
| 359 // TODO(evanm): support a locale here | 413 // TODO(evanm): support a locale here |
| 360 if (!iter.Init()) | 414 if (!iter.Init()) |
| 361 return false; | 415 return false; |
| 362 | 416 |
| 363 // To handle nesting, we maintain a stack of QueryNodeLists. | 417 // To handle nesting, we maintain a stack of QueryNodeLists. |
| 364 // The last element (back) of the stack contains the current, deepest node. | 418 // The last element (back) of the stack contains the current, deepest node. |
| 365 std::vector<QueryNodeList*> query_stack; | 419 std::vector<QueryNodeList*> query_stack; |
| 366 query_stack.push_back(root); | 420 query_stack.push_back(root); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 408 if (iter.IsWord()) { | 462 if (iter.IsWord()) { |
| 409 string16 word = iter.GetString(); | 463 string16 word = iter.GetString(); |
| 410 if (!word.empty()) { | 464 if (!word.empty()) { |
| 411 words->push_back(QueryWord()); | 465 words->push_back(QueryWord()); |
| 412 words->back().word = word; | 466 words->back().word = word; |
| 413 words->back().position = iter.prev(); | 467 words->back().position = iter.prev(); |
| 414 } | 468 } |
| 415 } | 469 } |
| 416 } | 470 } |
| 417 } | 471 } |
| OLD | NEW |