chrome/browser/history/url_index_private_data.cc - Issue 9359056: Merge 120346 - Fine-tune HQP Candidate Result Scoring

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9359056: Merge 120346 - Fine-tune HQP Candidate Result Scoring (Closed) Base URL: svn://svn.chromium.org/chrome/branches/1025/src/

Patch Set: Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

(...skipping 25 matching lines...) Expand all Loading...
36 CharWordMapEntry;	36 CharWordMapEntry;

37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem	37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

38 WordIDHistoryMapItem;	38 WordIDHistoryMapItem;

39 typedef imui::	39 typedef imui::

40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry	40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

41 WordIDHistoryMapEntry;	41 WordIDHistoryMapEntry;

42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;	42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry	43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

44 HistoryInfoMapEntry;	44 HistoryInfoMapEntry;

45	45

	46 // The maximum score any candidate result can achieve.

	47 const int kMaxTotalScore = 1425;

	48

46 // Score ranges used to get a 'base' score for each of the scoring factors	49 // Score ranges used to get a 'base' score for each of the scoring factors

47 // (such as recency of last visit, times visited, times the URL was typed,	50 // (such as recency of last visit, times visited, times the URL was typed,

48 // and the quality of the string match). There is a matching value range for	51 // and the quality of the string match). There is a matching value range for

49 // each of these scores for each factor.	52 // each of these scores for each factor. Note that the top score is greater

50 const int kScoreRank[] = { 1425, 1200, 900, 400 };	53 // than \|kMaxTotalScore\|. The score for each candidate will be capped in the

	54 // final calculation.

	55 const int kScoreRank[] = { 1450, 1200, 900, 400 };

51	56

52 // SearchTermCacheItem ---------------------------------------------------------	57 // SearchTermCacheItem ---------------------------------------------------------

53	58

54 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(	59 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

55 const WordIDSet& word_id_set,	60 const WordIDSet& word_id_set,

56 const HistoryIDSet& history_id_set)	61 const HistoryIDSet& history_id_set)

57 : word_id_set_(word_id_set),	62 : word_id_set_(word_id_set),

58 history_id_set_(history_id_set),	63 history_id_set_(history_id_set),

59 used_(true) {}	64 used_(true) {}

60	65

(...skipping 494 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
555 std::min(match.title_matches.size(), terms.size()) / terms.size();	560 std::min(match.title_matches.size(), terms.size()) / terms.size();

556 // Arbitrarily pick the best.	561 // Arbitrarily pick the best.

557 // TODO(mrossetti): It might make sense that a term which appears in both the	562 // TODO(mrossetti): It might make sense that a term which appears in both the

558 // URL and the Title should boost the score a bit.	563 // URL and the Title should boost the score a bit.

559 int term_score = std::max(url_score, title_score);	564 int term_score = std::max(url_score, title_score);

560 if (term_score == 0)	565 if (term_score == 0)

561 return match;	566 return match;

562	567

563 // Determine scoring factors for the recency of visit, visit count and typed	568 // Determine scoring factors for the recency of visit, visit count and typed

564 // count attributes of the URLRow.	569 // count attributes of the URLRow.

565 const int kDaysAgoLevel[] = { 0, 10, 20, 30 };	570 const int kDaysAgoLevel[] = { 1, 10, 20, 30 };

566 int days_ago_value = ScoreForValue((base::Time::Now() -	571 int days_ago_value = ScoreForValue((base::Time::Now() -

567 row.last_visit()).InDays(), kDaysAgoLevel);	572 row.last_visit()).InDays(), kDaysAgoLevel);

568 const int kVisitCountLevel[] = { 30, 10, 5, 3 };	573 const int kVisitCountLevel[] = { 30, 10, 5, 3 };

569 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);	574 int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);

570 const int kTypedCountLevel[] = { 10, 5, 3, 1 };	575 const int kTypedCountLevel[] = { 20, 10, 3, 1 };

571 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);	576 int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);

572	577

573 // The final raw score is calculated by:	578 // The final raw score is calculated by:

574 // - accumulating each contributing factor, some of which are added more	579 // - multiplying each factor by a 'relevance'

575 // than once giving them more 'influence' on the final score (currently,	580 // - calculating the average.

576 // visit_count_value is added twice and typed_count_value three times)	581 const int kTermScoreRelevance = 1;

577 // - dropping the lowest scores (\|kInsignificantFactors\|)	582 const int kDaysAgoRelevance = 2;

578 // - dividing by the remaining significant factors	583 const int kVisitCountRelevance = 3;

579 // This approach allows emphasis on more relevant factors while reducing the	584 const int kTypedCountRelevance = 4;

580 // inordinate impact of low scoring factors.	585 match.raw_score = term_score * kTermScoreRelevance +

581 int factor[] = {term_score, days_ago_value, visit_count_value,	586 days_ago_value * kDaysAgoRelevance +

582 visit_count_value, typed_count_value, typed_count_value,	587 visit_count_value * kVisitCountRelevance +

583 typed_count_value};	588 typed_count_value * kTypedCountRelevance;

584 const int kInsignificantFactors = 2;	589 match.raw_score /= (kTermScoreRelevance + kDaysAgoRelevance +

585 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors;	590 kVisitCountRelevance + kTypedCountRelevance);

586 std::partial_sort(factor, factor + kSignificantFactors,	591 match.raw_score = std::min(kMaxTotalScore, match.raw_score);

587 factor + arraysize(factor), std::greater<int>());

588 for (int i = 0; i < kSignificantFactors; ++i)

589 match.raw_score += factor[i];

590 match.raw_score /= kSignificantFactors;

591	592

592 return match;	593 return match;

593 }	594 }

594	595

595 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches,	596 int URLIndexPrivateData::ScoreComponentForMatches(const TermMatches& matches,

596 size_t max_length) {	597 size_t max_length) {

597 // TODO(mrossetti): This is good enough for now but must be fine-tuned.

598 if (matches.empty())	598 if (matches.empty())

599 return 0;	599 return 0;

600	600

601 // Score component for whether the input terms (if more than one) were found	601 // Score component for whether the input terms (if more than one) were found

602 // in the same order in the match. Start with kOrderMaxValue points divided	602 // in the same order in the match. Start with kOrderMaxValue points divided

603 // equally among (number of terms - 1); then discount each of those terms that	603 // equally among (number of terms - 1); then discount each of those terms that

604 // is out-of-order in the match.	604 // is out-of-order in the match.

605 const int kOrderMaxValue = 250;	605 const int kOrderMaxValue = 1000;

606 int order_value = kOrderMaxValue;	606 int order_value = kOrderMaxValue;

607 if (matches.size() > 1) {	607 if (matches.size() > 1) {

608 int max_possible_out_of_order = matches.size() - 1;	608 int max_possible_out_of_order = matches.size() - 1;

609 int out_of_order = 0;	609 int out_of_order = 0;

610 for (size_t i = 1; i < matches.size(); ++i) {	610 for (size_t i = 1; i < matches.size(); ++i) {

611 if (matches[i - 1].term_num > matches[i].term_num)	611 if (matches[i - 1].term_num > matches[i].term_num)

612 ++out_of_order;	612 ++out_of_order;

613 }	613 }

614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /	614 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /

615 max_possible_out_of_order;	615 max_possible_out_of_order;

616 }	616 }

617	617

618 // Score component for how early in the match string the first search term	618 // Score component for how early in the match string the first search term

619 // appears. Start with kStartMaxValue points and discount by	619 // appears. Start with kStartMaxValue points and discount by

620 // 1/kMaxSignificantStart points for each character later than the first at	620 // kStartMaxValue/kMaxSignificantStart points for each character later than

621 // which the term begins. No points are earned if the start of the match	621 // the first at which the term begins. No points are earned if the start of

622 // occurs at or after kMaxSignificantStart.	622 // the match occurs at or after kMaxSignificantStart.

623 const size_t kMaxSignificantStart = 20;	623 const size_t kMaxSignificantStart = 50;

624 const int kStartMaxValue = 250;	624 const int kStartMaxValue = 1000;

625 int start_value = (kMaxSignificantStart -	625 int start_value = (kMaxSignificantStart -

626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /	626 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /

627 kMaxSignificantStart;	627 kMaxSignificantStart;

628	628

629 // Score component for how much of the matched string the input terms cover.	629 // Score component for how much of the matched string the input terms cover.

630 // kCompleteMaxValue points times the fraction of the URL/page title string	630 // kCompleteMaxValue points times the fraction of the URL/page title string

631 // that was matched.	631 // that was matched.

632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),	632 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),

633 0, AccumulateMatchLength);	633 0, AccumulateMatchLength);

634 const size_t kMaxSignificantLength = 50;	634 const size_t kMaxSignificantLength = 50;

635 size_t max_significant_length =	635 size_t max_significant_length =

636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));	636 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));

637 const int kCompleteMaxValue = 500;	637 const int kCompleteMaxValue = 1000;

638 int complete_value =	638 int complete_value =

639 term_length_total * kCompleteMaxValue / max_significant_length;	639 term_length_total * kCompleteMaxValue / max_significant_length;

640	640

641 int raw_score = order_value + start_value + complete_value;	641 const int kOrderRelevance = 1;

642 const int kTermScoreLevel[] = { 1000, 650, 500, 200 };	642 const int kStartRelevance = 6;

	643 const int kCompleteRelevance = 3;

	644 int raw_score = order_value * kOrderRelevance +

	645 start_value * kStartRelevance +

	646 complete_value * kCompleteRelevance;

	647 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);

643	648

644 // Scale the sum of the three components above into a single score component	649 // Scale the raw score into a single score component in the same manner as

645 // on the same scale as that used in ScoredMatchForURL().	650 // used in ScoredMatchForURL().

	651 const int kTermScoreLevel[] = { 1000, 750, 500, 200 };

646 return ScoreForValue(raw_score, kTermScoreLevel);	652 return ScoreForValue(raw_score, kTermScoreLevel);

647 }	653 }

648	654

649 void URLIndexPrivateData::ResetSearchTermCache() {	655 void URLIndexPrivateData::ResetSearchTermCache() {

650 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	656 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

651 iter != search_term_cache_.end(); ++iter)	657 iter != search_term_cache_.end(); ++iter)

652 iter->second.used_ = false;	658 iter->second.used_ = false;

653 }	659 }

654	660

655 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	661 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

(...skipping 486 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1142 if (iter->has_title()) {	1148 if (iter->has_title()) {

1143 string16 title(UTF8ToUTF16(iter->title()));	1149 string16 title(UTF8ToUTF16(iter->title()));

1144 url_row.set_title(title);	1150 url_row.set_title(title);

1145 }	1151 }

1146 history_info_map_[history_id] = url_row;	1152 history_info_map_[history_id] = url_row;

1147 }	1153 }

1148 return true;	1154 return true;

1149 }	1155 }

1150	1156

1151 } // namespace history	1157 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/in_memory_url_index_unittest.cc ('k') | no next file » | no next file with comments »