OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <memory> | 8 #include <memory> |
9 | 9 |
10 #include "base/auto_reset.h" | 10 #include "base/auto_reset.h" |
11 #include "base/bind.h" | 11 #include "base/bind.h" |
12 #include "base/i18n/break_iterator.h" | |
12 #include "base/memory/ptr_util.h" | 13 #include "base/memory/ptr_util.h" |
13 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
14 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
15 #include "components/search_engines/search_terms_data.h" | 16 #include "components/search_engines/search_terms_data.h" |
16 #include "components/search_engines/template_url.h" | 17 #include "components/search_engines/template_url.h" |
17 #include "components/search_engines/template_url_service.h" | 18 #include "components/search_engines/template_url_service.h" |
18 #include "components/search_engines/template_url_service_client.h" | 19 #include "components/search_engines/template_url_service_client.h" |
19 #include "testing/gmock/include/gmock/gmock.h" | 20 #include "testing/gmock/include/gmock/gmock.h" |
20 #include "testing/gtest/include/gtest/gtest.h" | 21 #include "testing/gtest/include/gtest/gtest.h" |
21 | 22 |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
112 const base::string16& title) { | 113 const base::string16& title) { |
113 // Make an empty match and simply populate the fields we need in order | 114 // Make an empty match and simply populate the fields we need in order |
114 // to call GetTopicalityScore(). | 115 // to call GetTopicalityScore(). |
115 ScoredHistoryMatch scored_match; | 116 ScoredHistoryMatch scored_match; |
116 scored_match.url_matches = MatchTermInString(term, url, 0); | 117 scored_match.url_matches = MatchTermInString(term, url, 0); |
117 scored_match.title_matches = MatchTermInString(term, title, 0); | 118 scored_match.title_matches = MatchTermInString(term, title, 0); |
118 scored_match.topicality_threshold_ = -1; | 119 scored_match.topicality_threshold_ = -1; |
119 RowWordStarts word_starts; | 120 RowWordStarts word_starts; |
120 String16SetFromString16(url, &word_starts.url_word_starts_); | 121 String16SetFromString16(url, &word_starts.url_word_starts_); |
121 String16SetFromString16(title, &word_starts.title_word_starts_); | 122 String16SetFromString16(title, &word_starts.title_word_starts_); |
122 WordStarts one_word_no_offset(1, 0u); | 123 WordStarts term_word_starts(1, 0u); |
123 return scored_match.GetTopicalityScore(1, url, one_word_no_offset, | 124 base::i18n::BreakIterator iter(term, base::i18n::BreakIterator::BREAK_WORD); |
125 if (iter.Init()) { | |
126 // Find the first word start. | |
127 while (iter.Advance() && !iter.IsWord()) {} | |
128 term_word_starts[0] = iter.prev(); | |
129 } | |
130 return scored_match.GetTopicalityScore(1, url, term_word_starts, | |
124 word_starts); | 131 word_starts); |
125 } | 132 } |
126 | 133 |
127 TEST_F(ScoredHistoryMatchTest, Scoring) { | 134 TEST_F(ScoredHistoryMatchTest, Scoring) { |
128 // We use NowFromSystemTime() because MakeURLRow uses the same function | 135 // We use NowFromSystemTime() because MakeURLRow uses the same function |
129 // to calculate last visit time when building a row. | 136 // to calculate last visit time when building a row. |
130 base::Time now = base::Time::NowFromSystemTime(); | 137 base::Time now = base::Time::NowFromSystemTime(); |
131 | 138 |
132 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); | 139 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); |
133 RowWordStarts word_starts_a; | 140 RowWordStarts word_starts_a; |
(...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
543 } | 550 } |
544 | 551 |
545 // This function only tests scoring of single terms that match exactly | 552 // This function only tests scoring of single terms that match exactly |
546 // once somewhere in the URL or title. | 553 // once somewhere in the URL or title. |
547 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { | 554 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { |
548 base::string16 url = ASCIIToUTF16( | 555 base::string16 url = ASCIIToUTF16( |
549 "http://abc.def.com/path1/path2?" | 556 "http://abc.def.com/path1/path2?" |
550 "arg1=val1&arg2=val2#hash_component"); | 557 "arg1=val1&arg2=val2#hash_component"); |
551 base::string16 title = ASCIIToUTF16("here is a title"); | 558 base::string16 title = ASCIIToUTF16("here is a title"); |
552 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 559 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
553 ASCIIToUTF16("abc"), url, title); | 560 ASCIIToUTF16("abc"), url, title); |
Peter Kasting
2016/10/17 23:34:37
Nit: Define a lambda here (not sure how git cl for
Mark P
2016/10/18 22:57:29
Good idea. I find my aging brain doesn't think of
| |
554 const float hostname_mid_word_score = | 561 const float hostname_mid_word_score = |
555 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, | 562 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, |
556 title); | 563 title); |
564 const float hostname_score_preceeding_punctuation = | |
565 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("://abc"), url, | |
566 title); | |
557 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 567 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
558 ASCIIToUTF16("def"), url, title); | 568 ASCIIToUTF16("def"), url, title); |
559 const float domain_name_mid_word_score = | 569 const float domain_name_mid_word_score = |
560 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, | 570 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, |
561 title); | 571 title); |
572 const float domain_name_score_preceeding_dot = | |
573 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".def"), url, | |
574 title); | |
562 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 575 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
563 ASCIIToUTF16("com"), url, title); | 576 ASCIIToUTF16("com"), url, title); |
564 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 577 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
565 ASCIIToUTF16("om"), url, title); | 578 ASCIIToUTF16("om"), url, title); |
579 const float tld_score_preceeding_dot = | |
580 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".com"), url, | |
581 title); | |
566 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 582 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
567 ASCIIToUTF16("path1"), url, title); | 583 ASCIIToUTF16("path1"), url, title); |
568 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 584 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
569 ASCIIToUTF16("ath1"), url, title); | 585 ASCIIToUTF16("ath1"), url, title); |
586 const float path_score_preceeding_slash = | |
587 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("/path1"), url, | |
588 title); | |
570 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 589 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
571 ASCIIToUTF16("arg2"), url, title); | 590 ASCIIToUTF16("arg1"), url, title); |
572 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 591 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
573 ASCIIToUTF16("rg2"), url, title); | 592 ASCIIToUTF16("rg1"), url, title); |
593 const float arg_score_preceeding_question_mark = | |
594 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("?arg1"), url, | |
595 title); | |
574 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 596 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
575 ASCIIToUTF16("htt"), url, title); | 597 ASCIIToUTF16("htt"), url, title); |
576 const float protocol_mid_word_score = | 598 const float protocol_mid_word_score = |
577 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, | 599 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, |
578 title); | 600 title); |
579 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 601 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
580 ASCIIToUTF16("her"), url, title); | 602 ASCIIToUTF16("her"), url, title); |
581 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 603 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
582 ASCIIToUTF16("er"), url, title); | 604 ASCIIToUTF16("er"), url, title); |
583 // Verify hostname and domain name > path > arg. | 605 // Verify hostname and domain name > path > arg. |
584 EXPECT_GT(hostname_score, path_score); | 606 EXPECT_GT(hostname_score, path_score); |
585 EXPECT_GT(domain_name_score, path_score); | 607 EXPECT_GT(domain_name_score, path_score); |
586 EXPECT_GT(path_score, arg_score); | 608 EXPECT_GT(path_score, arg_score); |
609 // Verify leading punctuation doesn't confuse scoring. | |
610 EXPECT_EQ(hostname_score, hostname_score_preceeding_punctuation); | |
611 EXPECT_EQ(domain_name_score, domain_name_score_preceeding_dot); | |
612 EXPECT_EQ(tld_score, tld_score_preceeding_dot); | |
613 EXPECT_EQ(path_score, path_score_preceeding_slash); | |
614 EXPECT_EQ(arg_score, arg_score_preceeding_question_mark); | |
587 // Verify that domain name > path and domain name > arg for non-word | 615 // Verify that domain name > path and domain name > arg for non-word |
588 // boundaries. | 616 // boundaries. |
589 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); | 617 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); |
590 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); | 618 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); |
591 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); | 619 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); |
592 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); | 620 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); |
593 // Also verify that the matches at non-word-boundaries all score | 621 // Also verify that the matches at non-word-boundaries all score |
594 // worse than the matches at word boundaries. These three sets suffice. | 622 // worse than the matches at word boundaries. These three sets suffice. |
595 EXPECT_GT(arg_score, hostname_mid_word_score); | 623 EXPECT_GT(arg_score, hostname_mid_word_score); |
596 EXPECT_GT(arg_score, domain_name_mid_word_score); | 624 EXPECT_GT(arg_score, domain_name_mid_word_score); |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
654 | 682 |
655 EXPECT_TRUE( | 683 EXPECT_TRUE( |
656 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); | 684 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
657 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), | 685 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), |
658 Pair(12.0, 1300), Pair(20.0, 1399))); | 686 Pair(12.0, 1300), Pair(20.0, 1399))); |
659 // invalid string. | 687 // invalid string. |
660 buckets_str = "0.0,400,1.5,600"; | 688 buckets_str = "0.0,400,1.5,600"; |
661 EXPECT_FALSE( | 689 EXPECT_FALSE( |
662 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); | 690 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
663 } | 691 } |
OLD | NEW |