| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <limits> | 10 #include <limits> |
| (...skipping 25 matching lines...) Expand all Loading... |
| 36 CharWordMapEntry; | 36 CharWordMapEntry; |
| 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem | 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem |
| 38 WordIDHistoryMapItem; | 38 WordIDHistoryMapItem; |
| 39 typedef imui:: | 39 typedef imui:: |
| 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry | 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry |
| 41 WordIDHistoryMapEntry; | 41 WordIDHistoryMapEntry; |
| 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; | 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
| 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry | 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
| 44 HistoryInfoMapEntry; | 44 HistoryInfoMapEntry; |
| 45 | 45 |
| 46 // The maximum score any candidate result can achieve. |
| 47 const int kMaxTotalScore = 1425; |
| 48 |
| 46 // Score ranges used to get a 'base' score for each of the scoring factors | 49 // Score ranges used to get a 'base' score for each of the scoring factors |
| 47 // (such as recency of last visit, times visited, times the URL was typed, | 50 // (such as recency of last visit, times visited, times the URL was typed, |
| 48 // and the quality of the string match). There is a matching value range for | 51 // and the quality of the string match). There is a matching value range for |
| 49 // each of these scores for each factor. | 52 // each of these scores for each factor. Note that the top score is greater |
| 50 const int kScoreRank[] = { 1425, 1200, 900, 400 }; | 53 // than |kMaxTotalScore|. The score for each candidate will be capped in the |
| 54 // final calculation. |
| 55 const int kScoreRank[] = { 1450, 1200, 900, 400 }; |
| 51 | 56 |
| 52 // SearchTermCacheItem --------------------------------------------------------- | 57 // SearchTermCacheItem --------------------------------------------------------- |
| 53 | 58 |
| 54 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( | 59 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( |
| 55 const WordIDSet& word_id_set, | 60 const WordIDSet& word_id_set, |
| 56 const HistoryIDSet& history_id_set) | 61 const HistoryIDSet& history_id_set) |
| 57 : word_id_set_(word_id_set), | 62 : word_id_set_(word_id_set), |
| 58 history_id_set_(history_id_set), | 63 history_id_set_(history_id_set), |
| 59 used_(true) {} | 64 used_(true) {} |
| 60 | 65 |
| (...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 555 std::min(match.title_matches.size(), terms.size()) / terms.size(); | 560 std::min(match.title_matches.size(), terms.size()) / terms.size(); |
| 556 // Arbitrarily pick the best. | 561 // Arbitrarily pick the best. |
| 557 // TODO(mrossetti): It might make sense that a term which appears in both the | 562 // TODO(mrossetti): It might make sense that a term which appears in both the |
| 558 // URL and the Title should boost the score a bit. | 563 // URL and the Title should boost the score a bit. |
| 559 int term_score = std::max(url_score, title_score); | 564 int term_score = std::max(url_score, title_score); |
| 560 if (term_score == 0) | 565 if (term_score == 0) |
| 561 return match; | 566 return match; |
| 562 | 567 |
| 563 // Determine scoring factors for the recency of visit, visit count and typed | 568 // Determine scoring factors for the recency of visit, visit count and typed |
| 564 // count attributes of the URLRow. | 569 // count attributes of the URLRow. |
| 565 const int kDaysAgoLevel[] = { 0, 10, 20, 30 }; | 570 const int kDaysAgoLevel[] = { 1, 10, 20, 30 }; |
| 566 int days_ago_value = ScoreForValue((base::Time::Now() - | 571 int days_ago_value = ScoreForValue((base::Time::Now() - |
| 567 row.last_visit()).InDays(), kDaysAgoLevel); | 572 row.last_visit()).InDays(), kDaysAgoLevel); |
| 568 const int kVisitCountLevel[] = { 30, 10, 5, 3 }; | 573 const int kVisitCountLevel[] = { 30, 10, 5, 3 }; |
| 569 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); | 574 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); |
| 570 const int kTypedCountLevel[] = { 10, 5, 3, 1 }; | 575 const int kTypedCountLevel[] = { 20, 10, 3, 1 }; |
| 571 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); | 576 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); |
| 572 | 577 |
| 573 // The final raw score is calculated by: | 578 // The final raw score is calculated by: |
| 574 // - accumulating each contributing factor, some of which are added more | 579 // - multiplying each factor by a 'relevance' |
| 575 // than once giving them more 'influence' on the final score (currently, | 580 // - calculating the average. |
| 576 // visit_count_value is added twice and typed_count_value three times) | 581 const int kTermScoreRelevance = 1; |
| 577 // - dropping the lowest scores (|kInsignificantFactors|) | 582 const int kDaysAgoRelevance = 2; |
| 578 // - dividing by the remaining significant factors | 583 const int kVisitCountRelevance = 3; |
| 579 // This approach allows emphasis on more relevant factors while reducing the | 584 const int kTypedCountRelevance = 4; |
| 580 // inordinate impact of low scoring factors. | 585 match.raw_score = term_score * kTermScoreRelevance + |
| 581 int factor[] = {term_score, days_ago_value, visit_count_value, | 586 days_ago_value * kDaysAgoRelevance + |
| 582 visit_count_value, typed_count_value, typed_count_value, | 587 visit_count_value * kVisitCountRelevance + |
| 583 typed_count_value}; | 588 typed_count_value * kTypedCountRelevance; |
| 584 const int kInsignificantFactors = 2; | 589 match.raw_score /= (kTermScoreRelevance + kDaysAgoRelevance + |
| 585 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; | 590 kVisitCountRelevance + kTypedCountRelevance); |
| 586 std::partial_sort(factor, factor + kSignificantFactors, | 591 match.raw_score = std::min(kMaxTotalScore, match.raw_score); |
| 587 factor + arraysize(factor), std::greater<int>()); | |
| 588 for (int i = 0; i < kSignificantFactors; ++i) | |
| 589 match.raw_score += factor[i]; | |
| 590 match.raw_score /= kSignificantFactors; | |
| 591 | 592 |
| 592 return match; | 593 return match; |
| 593 } | 594 } |
| 594 | 595 |
| 595 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches, | 596 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches, |
| 596 size_t max_length) { | 597 size_t max_length) { |
| 597 // TODO(mrossetti): This is good enough for now but must be fine-tuned. | |
| 598 if (matches.empty()) | 598 if (matches.empty()) |
| 599 return 0; | 599 return 0; |
| 600 | 600 |
| 601 // Score component for whether the input terms (if more than one) were found | 601 // Score component for whether the input terms (if more than one) were found |
| 602 // in the same order in the match. Start with kOrderMaxValue points divided | 602 // in the same order in the match. Start with kOrderMaxValue points divided |
| 603 // equally among (number of terms - 1); then discount each of those terms that | 603 // equally among (number of terms - 1); then discount each of those terms that |
| 604 // is out-of-order in the match. | 604 // is out-of-order in the match. |
| 605 const int kOrderMaxValue = 250; | 605 const int kOrderMaxValue = 1000; |
| 606 int order_value = kOrderMaxValue; | 606 int order_value = kOrderMaxValue; |
| 607 if (matches.size() > 1) { | 607 if (matches.size() > 1) { |
| 608 int max_possible_out_of_order = matches.size() - 1; | 608 int max_possible_out_of_order = matches.size() - 1; |
| 609 int out_of_order = 0; | 609 int out_of_order = 0; |
| 610 for (size_t i = 1; i < matches.size(); ++i) { | 610 for (size_t i = 1; i < matches.size(); ++i) { |
| 611 if (matches[i - 1].term_num > matches[i].term_num) | 611 if (matches[i - 1].term_num > matches[i].term_num) |
| 612 ++out_of_order; | 612 ++out_of_order; |
| 613 } | 613 } |
| 614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / | 614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / |
| 615 max_possible_out_of_order; | 615 max_possible_out_of_order; |
| 616 } | 616 } |
| 617 | 617 |
| 618 // Score component for how early in the match string the first search term | 618 // Score component for how early in the match string the first search term |
| 619 // appears. Start with kStartMaxValue points and discount by | 619 // appears. Start with kStartMaxValue points and discount by |
| 620 // 1/kMaxSignificantStart points for each character later than the first at | 620 // kStartMaxValue/kMaxSignificantStart points for each character later than |
| 621 // which the term begins. No points are earned if the start of the match | 621 // the first at which the term begins. No points are earned if the start of |
| 622 // occurs at or after kMaxSignificantStart. | 622 // the match occurs at or after kMaxSignificantStart. |
| 623 const size_t kMaxSignificantStart = 20; | 623 const size_t kMaxSignificantStart = 50; |
| 624 const int kStartMaxValue = 250; | 624 const int kStartMaxValue = 1000; |
| 625 int start_value = (kMaxSignificantStart - | 625 int start_value = (kMaxSignificantStart - |
| 626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / | 626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / |
| 627 kMaxSignificantStart; | 627 kMaxSignificantStart; |
| 628 | 628 |
| 629 // Score component for how much of the matched string the input terms cover. | 629 // Score component for how much of the matched string the input terms cover. |
| 630 // kCompleteMaxValue points times the fraction of the URL/page title string | 630 // kCompleteMaxValue points times the fraction of the URL/page title string |
| 631 // that was matched. | 631 // that was matched. |
| 632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), | 632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), |
| 633 0, AccumulateMatchLength); | 633 0, AccumulateMatchLength); |
| 634 const size_t kMaxSignificantLength = 50; | 634 const size_t kMaxSignificantLength = 50; |
| 635 size_t max_significant_length = | 635 size_t max_significant_length = |
| 636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); | 636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); |
| 637 const int kCompleteMaxValue = 500; | 637 const int kCompleteMaxValue = 1000; |
| 638 int complete_value = | 638 int complete_value = |
| 639 term_length_total * kCompleteMaxValue / max_significant_length; | 639 term_length_total * kCompleteMaxValue / max_significant_length; |
| 640 | 640 |
| 641 int raw_score = order_value + start_value + complete_value; | 641 const int kOrderRelevance = 1; |
| 642 const int kTermScoreLevel[] = { 1000, 650, 500, 200 }; | 642 const int kStartRelevance = 6; |
| 643 const int kCompleteRelevance = 3; |
| 644 int raw_score = order_value * kOrderRelevance + |
| 645 start_value * kStartRelevance + |
| 646 complete_value * kCompleteRelevance; |
| 647 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); |
| 643 | 648 |
| 644 // Scale the sum of the three components above into a single score component | 649 // Scale the raw score into a single score component in the same manner as |
| 645 // on the same scale as that used in ScoredMatchForURL(). | 650 // used in ScoredMatchForURL(). |
| 651 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; |
| 646 return ScoreForValue(raw_score, kTermScoreLevel); | 652 return ScoreForValue(raw_score, kTermScoreLevel); |
| 647 } | 653 } |
| 648 | 654 |
| 649 void URLIndexPrivateData::ResetSearchTermCache() { | 655 void URLIndexPrivateData::ResetSearchTermCache() { |
| 650 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 656 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); |
| 651 iter != search_term_cache_.end(); ++iter) | 657 iter != search_term_cache_.end(); ++iter) |
| 652 iter->second.used_ = false; | 658 iter->second.used_ = false; |
| 653 } | 659 } |
| 654 | 660 |
| 655 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( | 661 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( |
| (...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1142 if (iter->has_title()) { | 1148 if (iter->has_title()) { |
| 1143 string16 title(UTF8ToUTF16(iter->title())); | 1149 string16 title(UTF8ToUTF16(iter->title())); |
| 1144 url_row.set_title(title); | 1150 url_row.set_title(title); |
| 1145 } | 1151 } |
| 1146 history_info_map_[history_id] = url_row; | 1152 history_info_map_[history_id] = url_row; |
| 1147 } | 1153 } |
| 1148 return true; | 1154 return true; |
| 1149 } | 1155 } |
| 1150 | 1156 |
| 1151 } // namespace history | 1157 } // namespace history |
| OLD | NEW |