OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
(...skipping 25 matching lines...) Expand all Loading... |
36 CharWordMapEntry; | 36 CharWordMapEntry; |
37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem | 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem |
38 WordIDHistoryMapItem; | 38 WordIDHistoryMapItem; |
39 typedef imui:: | 39 typedef imui:: |
40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry | 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry |
41 WordIDHistoryMapEntry; | 41 WordIDHistoryMapEntry; |
42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; | 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry | 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
44 HistoryInfoMapEntry; | 44 HistoryInfoMapEntry; |
45 | 45 |
| 46 // The maximum score any candidate result can achieve. |
| 47 const int kMaxTotalScore = 1425; |
| 48 |
46 // Score ranges used to get a 'base' score for each of the scoring factors | 49 // Score ranges used to get a 'base' score for each of the scoring factors |
47 // (such as recency of last visit, times visited, times the URL was typed, | 50 // (such as recency of last visit, times visited, times the URL was typed, |
48 // and the quality of the string match). There is a matching value range for | 51 // and the quality of the string match). There is a matching value range for |
49 // each of these scores for each factor. | 52 // each of these scores for each factor. Note that the top score is greater |
50 const int kScoreRank[] = { 1425, 1200, 900, 400 }; | 53 // than |kMaxTotalScore|. The score for each candidate will be capped in the |
| 54 // final calculation. |
| 55 const int kScoreRank[] = { 1450, 1200, 900, 400 }; |
51 | 56 |
52 // SearchTermCacheItem --------------------------------------------------------- | 57 // SearchTermCacheItem --------------------------------------------------------- |
53 | 58 |
54 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( | 59 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( |
55 const WordIDSet& word_id_set, | 60 const WordIDSet& word_id_set, |
56 const HistoryIDSet& history_id_set) | 61 const HistoryIDSet& history_id_set) |
57 : word_id_set_(word_id_set), | 62 : word_id_set_(word_id_set), |
58 history_id_set_(history_id_set), | 63 history_id_set_(history_id_set), |
59 used_(true) {} | 64 used_(true) {} |
60 | 65 |
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
555 std::min(match.title_matches.size(), terms.size()) / terms.size(); | 560 std::min(match.title_matches.size(), terms.size()) / terms.size(); |
556 // Arbitrarily pick the best. | 561 // Arbitrarily pick the best. |
557 // TODO(mrossetti): It might make sense that a term which appears in both the | 562 // TODO(mrossetti): It might make sense that a term which appears in both the |
558 // URL and the Title should boost the score a bit. | 563 // URL and the Title should boost the score a bit. |
559 int term_score = std::max(url_score, title_score); | 564 int term_score = std::max(url_score, title_score); |
560 if (term_score == 0) | 565 if (term_score == 0) |
561 return match; | 566 return match; |
562 | 567 |
563 // Determine scoring factors for the recency of visit, visit count and typed | 568 // Determine scoring factors for the recency of visit, visit count and typed |
564 // count attributes of the URLRow. | 569 // count attributes of the URLRow. |
565 const int kDaysAgoLevel[] = { 0, 10, 20, 30 }; | 570 const int kDaysAgoLevel[] = { 1, 10, 20, 30 }; |
566 int days_ago_value = ScoreForValue((base::Time::Now() - | 571 int days_ago_value = ScoreForValue((base::Time::Now() - |
567 row.last_visit()).InDays(), kDaysAgoLevel); | 572 row.last_visit()).InDays(), kDaysAgoLevel); |
568 const int kVisitCountLevel[] = { 30, 10, 5, 3 }; | 573 const int kVisitCountLevel[] = { 30, 10, 5, 3 }; |
569 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); | 574 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); |
570 const int kTypedCountLevel[] = { 10, 5, 3, 1 }; | 575 const int kTypedCountLevel[] = { 20, 10, 3, 1 }; |
571 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); | 576 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); |
572 | 577 |
573 // The final raw score is calculated by: | 578 // The final raw score is calculated by: |
574 // - accumulating each contributing factor, some of which are added more | 579 // - multiplying each factor by a 'relevance' |
575 // than once giving them more 'influence' on the final score (currently, | 580 // - calculating the average. |
576 // visit_count_value is added twice and typed_count_value three times) | 581 const int kTermScoreRelevance = 1; |
577 // - dropping the lowest scores (|kInsignificantFactors|) | 582 const int kDaysAgoRelevance = 2; |
578 // - dividing by the remaining significant factors | 583 const int kVisitCountRelevance = 3; |
579 // This approach allows emphasis on more relevant factors while reducing the | 584 const int kTypedCountRelevance = 4; |
580 // inordinate impact of low scoring factors. | 585 match.raw_score = term_score * kTermScoreRelevance + |
581 int factor[] = {term_score, days_ago_value, visit_count_value, | 586 days_ago_value * kDaysAgoRelevance + |
582 visit_count_value, typed_count_value, typed_count_value, | 587 visit_count_value * kVisitCountRelevance + |
583 typed_count_value}; | 588 typed_count_value * kTypedCountRelevance; |
584 const int kInsignificantFactors = 2; | 589 match.raw_score /= (kTermScoreRelevance + kDaysAgoRelevance + |
585 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; | 590 kVisitCountRelevance + kTypedCountRelevance); |
586 std::partial_sort(factor, factor + kSignificantFactors, | 591 match.raw_score = std::min(kMaxTotalScore, match.raw_score); |
587 factor + arraysize(factor), std::greater<int>()); | |
588 for (int i = 0; i < kSignificantFactors; ++i) | |
589 match.raw_score += factor[i]; | |
590 match.raw_score /= kSignificantFactors; | |
591 | 592 |
592 return match; | 593 return match; |
593 } | 594 } |
594 | 595 |
595 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches, | 596 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches, |
596 size_t max_length) { | 597 size_t max_length) { |
597 // TODO(mrossetti): This is good enough for now but must be fine-tuned. | |
598 if (matches.empty()) | 598 if (matches.empty()) |
599 return 0; | 599 return 0; |
600 | 600 |
601 // Score component for whether the input terms (if more than one) were found | 601 // Score component for whether the input terms (if more than one) were found |
602 // in the same order in the match. Start with kOrderMaxValue points divided | 602 // in the same order in the match. Start with kOrderMaxValue points divided |
603 // equally among (number of terms - 1); then discount each of those terms that | 603 // equally among (number of terms - 1); then discount each of those terms that |
604 // is out-of-order in the match. | 604 // is out-of-order in the match. |
605 const int kOrderMaxValue = 250; | 605 const int kOrderMaxValue = 1000; |
606 int order_value = kOrderMaxValue; | 606 int order_value = kOrderMaxValue; |
607 if (matches.size() > 1) { | 607 if (matches.size() > 1) { |
608 int max_possible_out_of_order = matches.size() - 1; | 608 int max_possible_out_of_order = matches.size() - 1; |
609 int out_of_order = 0; | 609 int out_of_order = 0; |
610 for (size_t i = 1; i < matches.size(); ++i) { | 610 for (size_t i = 1; i < matches.size(); ++i) { |
611 if (matches[i - 1].term_num > matches[i].term_num) | 611 if (matches[i - 1].term_num > matches[i].term_num) |
612 ++out_of_order; | 612 ++out_of_order; |
613 } | 613 } |
614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / | 614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / |
615 max_possible_out_of_order; | 615 max_possible_out_of_order; |
616 } | 616 } |
617 | 617 |
618 // Score component for how early in the match string the first search term | 618 // Score component for how early in the match string the first search term |
619 // appears. Start with kStartMaxValue points and discount by | 619 // appears. Start with kStartMaxValue points and discount by |
620 // 1/kMaxSignificantStart points for each character later than the first at | 620 // kStartMaxValue/kMaxSignificantStart points for each character later than |
621 // which the term begins. No points are earned if the start of the match | 621 // the first at which the term begins. No points are earned if the start of |
622 // occurs at or after kMaxSignificantStart. | 622 // the match occurs at or after kMaxSignificantStart. |
623 const size_t kMaxSignificantStart = 20; | 623 const size_t kMaxSignificantStart = 50; |
624 const int kStartMaxValue = 250; | 624 const int kStartMaxValue = 1000; |
625 int start_value = (kMaxSignificantStart - | 625 int start_value = (kMaxSignificantStart - |
626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / | 626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / |
627 kMaxSignificantStart; | 627 kMaxSignificantStart; |
628 | 628 |
629 // Score component for how much of the matched string the input terms cover. | 629 // Score component for how much of the matched string the input terms cover. |
630 // kCompleteMaxValue points times the fraction of the URL/page title string | 630 // kCompleteMaxValue points times the fraction of the URL/page title string |
631 // that was matched. | 631 // that was matched. |
632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), | 632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), |
633 0, AccumulateMatchLength); | 633 0, AccumulateMatchLength); |
634 const size_t kMaxSignificantLength = 50; | 634 const size_t kMaxSignificantLength = 50; |
635 size_t max_significant_length = | 635 size_t max_significant_length = |
636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); | 636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); |
637 const int kCompleteMaxValue = 500; | 637 const int kCompleteMaxValue = 1000; |
638 int complete_value = | 638 int complete_value = |
639 term_length_total * kCompleteMaxValue / max_significant_length; | 639 term_length_total * kCompleteMaxValue / max_significant_length; |
640 | 640 |
641 int raw_score = order_value + start_value + complete_value; | 641 const int kOrderRelevance = 1; |
642 const int kTermScoreLevel[] = { 1000, 650, 500, 200 }; | 642 const int kStartRelevance = 6; |
| 643 const int kCompleteRelevance = 3; |
| 644 int raw_score = order_value * kOrderRelevance + |
| 645 start_value * kStartRelevance + |
| 646 complete_value * kCompleteRelevance; |
| 647 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); |
643 | 648 |
644 // Scale the sum of the three components above into a single score component | 649 // Scale the raw score into a single score component in the same manner as |
645 // on the same scale as that used in ScoredMatchForURL(). | 650 // used in ScoredMatchForURL(). |
| 651 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; |
646 return ScoreForValue(raw_score, kTermScoreLevel); | 652 return ScoreForValue(raw_score, kTermScoreLevel); |
647 } | 653 } |
648 | 654 |
649 void URLIndexPrivateData::ResetSearchTermCache() { | 655 void URLIndexPrivateData::ResetSearchTermCache() { |
650 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 656 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); |
651 iter != search_term_cache_.end(); ++iter) | 657 iter != search_term_cache_.end(); ++iter) |
652 iter->second.used_ = false; | 658 iter->second.used_ = false; |
653 } | 659 } |
654 | 660 |
655 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( | 661 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( |
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1142 if (iter->has_title()) { | 1148 if (iter->has_title()) { |
1143 string16 title(UTF8ToUTF16(iter->title())); | 1149 string16 title(UTF8ToUTF16(iter->title())); |
1144 url_row.set_title(title); | 1150 url_row.set_title(title); |
1145 } | 1151 } |
1146 history_info_map_[history_id] = url_row; | 1152 history_info_map_[history_id] = url_row; |
1147 } | 1153 } |
1148 return true; | 1154 return true; |
1149 } | 1155 } |
1150 | 1156 |
1151 } // namespace history | 1157 } // namespace history |
OLD | NEW |