Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9359056: Merge 120346 - Fine-tune HQP Candidate Result Scoring (Closed) Base URL: svn://svn.chromium.org/chrome/branches/1025/src/
Patch Set: Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/history/in_memory_url_index_unittest.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
(...skipping 25 matching lines...) Expand all
36 CharWordMapEntry; 36 CharWordMapEntry;
37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
38 WordIDHistoryMapItem; 38 WordIDHistoryMapItem;
39 typedef imui:: 39 typedef imui::
40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
41 WordIDHistoryMapEntry; 41 WordIDHistoryMapEntry;
42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
44 HistoryInfoMapEntry; 44 HistoryInfoMapEntry;
45 45
46 // The maximum score any candidate result can achieve.
47 const int kMaxTotalScore = 1425;
48
46 // Score ranges used to get a 'base' score for each of the scoring factors 49 // Score ranges used to get a 'base' score for each of the scoring factors
47 // (such as recency of last visit, times visited, times the URL was typed, 50 // (such as recency of last visit, times visited, times the URL was typed,
48 // and the quality of the string match). There is a matching value range for 51 // and the quality of the string match). There is a matching value range for
49 // each of these scores for each factor. 52 // each of these scores for each factor. Note that the top score is greater
50 const int kScoreRank[] = { 1425, 1200, 900, 400 }; 53 // than |kMaxTotalScore|. The score for each candidate will be capped in the
54 // final calculation.
55 const int kScoreRank[] = { 1450, 1200, 900, 400 };
51 56
52 // SearchTermCacheItem --------------------------------------------------------- 57 // SearchTermCacheItem ---------------------------------------------------------
53 58
54 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( 59 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
55 const WordIDSet& word_id_set, 60 const WordIDSet& word_id_set,
56 const HistoryIDSet& history_id_set) 61 const HistoryIDSet& history_id_set)
57 : word_id_set_(word_id_set), 62 : word_id_set_(word_id_set),
58 history_id_set_(history_id_set), 63 history_id_set_(history_id_set),
59 used_(true) {} 64 used_(true) {}
60 65
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
555 std::min(match.title_matches.size(), terms.size()) / terms.size(); 560 std::min(match.title_matches.size(), terms.size()) / terms.size();
556 // Arbitrarily pick the best. 561 // Arbitrarily pick the best.
557 // TODO(mrossetti): It might make sense that a term which appears in both the 562 // TODO(mrossetti): It might make sense that a term which appears in both the
558 // URL and the Title should boost the score a bit. 563 // URL and the Title should boost the score a bit.
559 int term_score = std::max(url_score, title_score); 564 int term_score = std::max(url_score, title_score);
560 if (term_score == 0) 565 if (term_score == 0)
561 return match; 566 return match;
562 567
563 // Determine scoring factors for the recency of visit, visit count and typed 568 // Determine scoring factors for the recency of visit, visit count and typed
564 // count attributes of the URLRow. 569 // count attributes of the URLRow.
565 const int kDaysAgoLevel[] = { 0, 10, 20, 30 }; 570 const int kDaysAgoLevel[] = { 1, 10, 20, 30 };
566 int days_ago_value = ScoreForValue((base::Time::Now() - 571 int days_ago_value = ScoreForValue((base::Time::Now() -
567 row.last_visit()).InDays(), kDaysAgoLevel); 572 row.last_visit()).InDays(), kDaysAgoLevel);
568 const int kVisitCountLevel[] = { 30, 10, 5, 3 }; 573 const int kVisitCountLevel[] = { 30, 10, 5, 3 };
569 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); 574 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
570 const int kTypedCountLevel[] = { 10, 5, 3, 1 }; 575 const int kTypedCountLevel[] = { 20, 10, 3, 1 };
571 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); 576 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);
572 577
573 // The final raw score is calculated by: 578 // The final raw score is calculated by:
574 // - accumulating each contributing factor, some of which are added more 579 // - multiplying each factor by a 'relevance'
575 // than once giving them more 'influence' on the final score (currently, 580 // - calculating the average.
576 // visit_count_value is added twice and typed_count_value three times) 581 const int kTermScoreRelevance = 1;
577 // - dropping the lowest scores (|kInsignificantFactors|) 582 const int kDaysAgoRelevance = 2;
578 // - dividing by the remaining significant factors 583 const int kVisitCountRelevance = 3;
579 // This approach allows emphasis on more relevant factors while reducing the 584 const int kTypedCountRelevance = 4;
580 // inordinate impact of low scoring factors. 585 match.raw_score = term_score * kTermScoreRelevance +
581 int factor[] = {term_score, days_ago_value, visit_count_value, 586 days_ago_value * kDaysAgoRelevance +
582 visit_count_value, typed_count_value, typed_count_value, 587 visit_count_value * kVisitCountRelevance +
583 typed_count_value}; 588 typed_count_value * kTypedCountRelevance;
584 const int kInsignificantFactors = 2; 589 match.raw_score /= (kTermScoreRelevance + kDaysAgoRelevance +
585 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; 590 kVisitCountRelevance + kTypedCountRelevance);
586 std::partial_sort(factor, factor + kSignificantFactors, 591 match.raw_score = std::min(kMaxTotalScore, match.raw_score);
587 factor + arraysize(factor), std::greater<int>());
588 for (int i = 0; i < kSignificantFactors; ++i)
589 match.raw_score += factor[i];
590 match.raw_score /= kSignificantFactors;
591 592
592 return match; 593 return match;
593 } 594 }
594 595
595 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches, 596 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches,
596 size_t max_length) { 597 size_t max_length) {
597 // TODO(mrossetti): This is good enough for now but must be fine-tuned.
598 if (matches.empty()) 598 if (matches.empty())
599 return 0; 599 return 0;
600 600
601 // Score component for whether the input terms (if more than one) were found 601 // Score component for whether the input terms (if more than one) were found
602 // in the same order in the match. Start with kOrderMaxValue points divided 602 // in the same order in the match. Start with kOrderMaxValue points divided
603 // equally among (number of terms - 1); then discount each of those terms that 603 // equally among (number of terms - 1); then discount each of those terms that
604 // is out-of-order in the match. 604 // is out-of-order in the match.
605 const int kOrderMaxValue = 250; 605 const int kOrderMaxValue = 1000;
606 int order_value = kOrderMaxValue; 606 int order_value = kOrderMaxValue;
607 if (matches.size() > 1) { 607 if (matches.size() > 1) {
608 int max_possible_out_of_order = matches.size() - 1; 608 int max_possible_out_of_order = matches.size() - 1;
609 int out_of_order = 0; 609 int out_of_order = 0;
610 for (size_t i = 1; i < matches.size(); ++i) { 610 for (size_t i = 1; i < matches.size(); ++i) {
611 if (matches[i - 1].term_num > matches[i].term_num) 611 if (matches[i - 1].term_num > matches[i].term_num)
612 ++out_of_order; 612 ++out_of_order;
613 } 613 }
614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / 614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
615 max_possible_out_of_order; 615 max_possible_out_of_order;
616 } 616 }
617 617
618 // Score component for how early in the match string the first search term 618 // Score component for how early in the match string the first search term
619 // appears. Start with kStartMaxValue points and discount by 619 // appears. Start with kStartMaxValue points and discount by
620 // 1/kMaxSignificantStart points for each character later than the first at 620 // kStartMaxValue/kMaxSignificantStart points for each character later than
621 // which the term begins. No points are earned if the start of the match 621 // the first at which the term begins. No points are earned if the start of
622 // occurs at or after kMaxSignificantStart. 622 // the match occurs at or after kMaxSignificantStart.
623 const size_t kMaxSignificantStart = 20; 623 const size_t kMaxSignificantStart = 50;
624 const int kStartMaxValue = 250; 624 const int kStartMaxValue = 1000;
625 int start_value = (kMaxSignificantStart - 625 int start_value = (kMaxSignificantStart -
626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / 626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
627 kMaxSignificantStart; 627 kMaxSignificantStart;
628 628
629 // Score component for how much of the matched string the input terms cover. 629 // Score component for how much of the matched string the input terms cover.
630 // kCompleteMaxValue points times the fraction of the URL/page title string 630 // kCompleteMaxValue points times the fraction of the URL/page title string
631 // that was matched. 631 // that was matched.
632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), 632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
633 0, AccumulateMatchLength); 633 0, AccumulateMatchLength);
634 const size_t kMaxSignificantLength = 50; 634 const size_t kMaxSignificantLength = 50;
635 size_t max_significant_length = 635 size_t max_significant_length =
636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); 636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));
637 const int kCompleteMaxValue = 500; 637 const int kCompleteMaxValue = 1000;
638 int complete_value = 638 int complete_value =
639 term_length_total * kCompleteMaxValue / max_significant_length; 639 term_length_total * kCompleteMaxValue / max_significant_length;
640 640
641 int raw_score = order_value + start_value + complete_value; 641 const int kOrderRelevance = 1;
642 const int kTermScoreLevel[] = { 1000, 650, 500, 200 }; 642 const int kStartRelevance = 6;
643 const int kCompleteRelevance = 3;
644 int raw_score = order_value * kOrderRelevance +
645 start_value * kStartRelevance +
646 complete_value * kCompleteRelevance;
647 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);
643 648
644 // Scale the sum of the three components above into a single score component 649 // Scale the raw score into a single score component in the same manner as
645 // on the same scale as that used in ScoredMatchForURL(). 650 // used in ScoredMatchForURL().
651 const int kTermScoreLevel[] = { 1000, 750, 500, 200 };
646 return ScoreForValue(raw_score, kTermScoreLevel); 652 return ScoreForValue(raw_score, kTermScoreLevel);
647 } 653 }
648 654
649 void URLIndexPrivateData::ResetSearchTermCache() { 655 void URLIndexPrivateData::ResetSearchTermCache() {
650 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 656 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
651 iter != search_term_cache_.end(); ++iter) 657 iter != search_term_cache_.end(); ++iter)
652 iter->second.used_ = false; 658 iter->second.used_ = false;
653 } 659 }
654 660
655 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 661 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after
1142 if (iter->has_title()) { 1148 if (iter->has_title()) {
1143 string16 title(UTF8ToUTF16(iter->title())); 1149 string16 title(UTF8ToUTF16(iter->title()));
1144 url_row.set_title(title); 1150 url_row.set_title(title);
1145 } 1151 }
1146 history_info_map_[history_id] = url_row; 1152 history_info_map_[history_id] = url_row;
1147 } 1153 }
1148 return true; 1154 return true;
1149 } 1155 }
1150 1156
1151 } // namespace history 1157 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_url_index_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698