| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
| 6 | 6 |
| 7 #include <stdint.h> | 7 #include <stdint.h> |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <iterator> | 10 #include <iterator> |
| (...skipping 465 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 476 HistoryIDVector history_ids; | 476 HistoryIDVector history_ids; |
| 477 String16Vector words(unsorted_words); | 477 String16Vector words(unsorted_words); |
| 478 // Sort the words into the longest first as such are likely to narrow down | 478 // Sort the words into the longest first as such are likely to narrow down |
| 479 // the results quicker. Also, single character words are the most expensive | 479 // the results quicker. Also, single character words are the most expensive |
| 480 // to process so save them for last. | 480 // to process so save them for last. |
| 481 std::sort(words.begin(), words.end(), LengthGreater); | 481 std::sort(words.begin(), words.end(), LengthGreater); |
| 482 | 482 |
| 483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). | 483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). |
| 484 for (String16Vector::iterator iter = words.begin(); iter != words.end(); | 484 for (String16Vector::iterator iter = words.begin(); iter != words.end(); |
| 485 ++iter) { | 485 ++iter) { |
| 486 base::string16 uni_word = *iter; | 486 HistoryIDSet term_history_set = HistoryIDsForTerm(*iter); |
| 487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); | 487 if (term_history_set.empty()) |
| 488 if (term_history_set.empty()) { | 488 return HistoryIDVector(); |
| 489 history_ids.clear(); | 489 |
| 490 break; | |
| 491 } | |
| 492 if (iter == words.begin()) { | 490 if (iter == words.begin()) { |
| 493 history_ids = {term_history_set.begin(), term_history_set.end()}; | 491 history_ids = {term_history_set.begin(), term_history_set.end()}; |
| 494 } else { | 492 } else { |
| 495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids, | 493 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids, |
| 496 term_history_set); | 494 term_history_set); |
| 497 } | 495 } |
| 498 } | 496 } |
| 499 return history_ids; | 497 return history_ids; |
| 500 } | 498 } |
| 501 | 499 |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 581 return HistoryIDSet(); | 579 return HistoryIDSet(); |
| 582 } | 580 } |
| 583 // Or there may not have been a prefix from which to start. | 581 // Or there may not have been a prefix from which to start. |
| 584 word_id_set = prefix_chars.empty() ? std::move(leftover_set) | 582 word_id_set = prefix_chars.empty() ? std::move(leftover_set) |
| 585 : base::STLSetIntersection<WordIDSet>( | 583 : base::STLSetIntersection<WordIDSet>( |
| 586 word_id_set, leftover_set); | 584 word_id_set, leftover_set); |
| 587 } | 585 } |
| 588 | 586 |
| 589 // We must filter the word list because the resulting word set surely | 587 // We must filter the word list because the resulting word set surely |
| 590 // contains words which do not have the search term as a proper subset. | 588 // contains words which do not have the search term as a proper subset. |
| 591 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); | 589 word_id_set.erase(std::remove_if(word_id_set.begin(), word_id_set.end(), |
| 592 word_set_iter != word_id_set.end(); ) { | 590 [&](WordID word_id) { |
| 593 if (word_list_[*word_set_iter].find(term) == base::string16::npos) | 591 return word_list_[word_id].find(term) == |
| 594 word_set_iter = word_id_set.erase(word_set_iter); | 592 base::string16::npos; |
| 595 else | 593 }), |
| 596 ++word_set_iter; | 594 word_id_set.end()); |
| 597 } | |
| 598 } else { | 595 } else { |
| 599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); | 596 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); |
| 600 } | 597 } |
| 601 | 598 |
| 602 // If any words resulted then we can compose a set of history IDs by unioning | 599 // If any words resulted then we can compose a set of history IDs by unioning |
| 603 // the sets from each word. | 600 // the sets from each word. |
| 604 HistoryIDSet history_id_set; | 601 // We use |buffer| because it's more efficient to collect everything and then |
| 602 // construct a flat_set than to insert elements one by one. |
| 603 HistoryIDVector buffer; |
| 605 for (WordID word_id : word_id_set) { | 604 for (WordID word_id : word_id_set) { |
| 606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); | 605 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); |
| 607 if (word_iter != word_id_history_map_.end()) { | 606 if (word_iter != word_id_history_map_.end()) { |
| 608 HistoryIDSet& word_history_id_set(word_iter->second); | 607 HistoryIDSet& word_history_id_set(word_iter->second); |
| 609 history_id_set.insert(word_history_id_set.begin(), | 608 buffer.insert(buffer.end(), word_history_id_set.begin(), |
| 610 word_history_id_set.end()); | 609 word_history_id_set.end()); |
| 611 } | 610 } |
| 612 } | 611 } |
| 612 HistoryIDSet history_id_set(buffer.begin(), buffer.end()); |
| 613 | 613 |
| 614 // Record a new cache entry for this word if the term is longer than | 614 // Record a new cache entry for this word if the term is longer than |
| 615 // a single character. | 615 // a single character. |
| 616 if (term_length > 1) | 616 if (term_length > 1) |
| 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); | 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); |
| 618 | 618 |
| 619 return history_id_set; | 619 return history_id_set; |
| 620 } | 620 } |
| 621 | 621 |
| 622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( | 622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( |
| 623 const Char16Set& term_chars) { | 623 const Char16Set& term_chars) { |
| 624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). | 624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). |
| 625 | 625 |
| 626 WordIDSet word_id_set; | 626 WordIDSet word_id_set; |
| 627 for (Char16Set::const_iterator c_iter = term_chars.begin(); | 627 for (Char16Set::const_iterator c_iter = term_chars.begin(); |
| 628 c_iter != term_chars.end(); ++c_iter) { | 628 c_iter != term_chars.end(); ++c_iter) { |
| 629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); | 629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); |
| 630 if (char_iter == char_word_map_.end()) { | 630 // A character was not found so there are no matching results: bail. |
| 631 // A character was not found so there are no matching results: bail. | 631 if (char_iter == char_word_map_.end()) |
| 632 word_id_set.clear(); | 632 return WordIDSet(); |
| 633 break; | 633 |
| 634 } | 634 const WordIDSet& char_word_id_set(char_iter->second); |
| 635 WordIDSet& char_word_id_set(char_iter->second); | |
| 636 // It is possible for there to no longer be any words associated with | 635 // It is possible for there to no longer be any words associated with |
| 637 // a particular character. Give up in that case. | 636 // a particular character. Give up in that case. |
| 638 if (char_word_id_set.empty()) { | 637 if (char_word_id_set.empty()) |
| 639 word_id_set.clear(); | 638 return WordIDSet(); |
| 640 break; | |
| 641 } | |
| 642 | 639 |
| 643 if (c_iter == term_chars.begin()) { | 640 if (c_iter == term_chars.begin()) { |
| 644 // First character results becomes base set of results. | |
| 645 word_id_set = char_word_id_set; | 641 word_id_set = char_word_id_set; |
| 646 } else { | 642 } else { |
| 647 // Subsequent character results get intersected in. | |
| 648 word_id_set = | 643 word_id_set = |
| 649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set); | 644 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set); |
| 650 } | 645 } |
| 651 } | 646 } |
| 652 return word_id_set; | 647 return word_id_set; |
| 653 } | 648 } |
| 654 | 649 |
| 655 void URLIndexPrivateData::HistoryIdsToScoredMatches( | 650 void URLIndexPrivateData::HistoryIdsToScoredMatches( |
| 656 HistoryIDVector history_ids, | 651 HistoryIDVector history_ids, |
| 657 const base::string16& lower_raw_string, | 652 const base::string16& lower_raw_string, |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 702 const history::URLRow& hist_item = hist_pos->second.url_row; | 697 const history::URLRow& hist_item = hist_pos->second.url_row; |
| 703 auto starts_pos = word_starts_map_.find(history_id); | 698 auto starts_pos = word_starts_map_.find(history_id); |
| 704 DCHECK(starts_pos != word_starts_map_.end()); | 699 DCHECK(starts_pos != word_starts_map_.end()); |
| 705 ScoredHistoryMatch new_scored_match( | 700 ScoredHistoryMatch new_scored_match( |
| 706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms, | 701 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms, |
| 707 lower_terms_to_word_starts_offsets, starts_pos->second, | 702 lower_terms_to_word_starts_offsets, starts_pos->second, |
| 708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), | 703 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), |
| 709 num_matches, now); | 704 num_matches, now); |
| 710 // Filter new matches that ended up scoring 0. (These are usually matches | 705 // Filter new matches that ended up scoring 0. (These are usually matches |
| 711 // which didn't match the user's raw terms.) | 706 // which didn't match the user's raw terms.) |
| 712 if (new_scored_match.raw_score != 0) | 707 if (new_scored_match.raw_score > 0) |
| 713 scored_items->push_back(std::move(new_scored_match)); | 708 scored_items->push_back(std::move(new_scored_match)); |
| 714 } | 709 } |
| 715 } | 710 } |
| 716 | 711 |
| 717 // static | 712 // static |
| 718 void URLIndexPrivateData::CalculateWordStartsOffsets( | 713 void URLIndexPrivateData::CalculateWordStartsOffsets( |
| 719 const String16Vector& lower_terms, | 714 const String16Vector& lower_terms, |
| 720 WordStarts* lower_terms_to_word_starts_offsets) { | 715 WordStarts* lower_terms_to_word_starts_offsets) { |
| 721 // Calculate offsets for each term. For instance, the offset for | 716 // Calculate offsets for each term. For instance, the offset for |
| 722 // ".net" should be 1, indicating that the actual word-part of the term | 717 // ".net" should be 1, indicating that the actual word-part of the term |
| (...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1281 // First cut: typed count, visit count, recency. | 1276 // First cut: typed count, visit count, recency. |
| 1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1277 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
| 1283 // recently visited (within the last 12/24 hours) as highly important. Get | 1278 // recently visited (within the last 12/24 hours) as highly important. Get |
| 1284 // input from mpearson. | 1279 // input from mpearson. |
| 1285 if (r1.typed_count() != r2.typed_count()) | 1280 if (r1.typed_count() != r2.typed_count()) |
| 1286 return (r1.typed_count() > r2.typed_count()); | 1281 return (r1.typed_count() > r2.typed_count()); |
| 1287 if (r1.visit_count() != r2.visit_count()) | 1282 if (r1.visit_count() != r2.visit_count()) |
| 1288 return (r1.visit_count() > r2.visit_count()); | 1283 return (r1.visit_count() > r2.visit_count()); |
| 1289 return (r1.last_visit() > r2.last_visit()); | 1284 return (r1.last_visit() > r2.last_visit()); |
| 1290 } | 1285 } |
| OLD | NEW |