Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <memory> | 8 #include <memory> |
| 9 | 9 |
| 10 #include "base/auto_reset.h" | 10 #include "base/auto_reset.h" |
| 11 #include "base/bind.h" | 11 #include "base/bind.h" |
| 12 #include "base/i18n/break_iterator.h" | |
| 12 #include "base/memory/ptr_util.h" | 13 #include "base/memory/ptr_util.h" |
| 13 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
| 14 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
| 15 #include "components/search_engines/search_terms_data.h" | 16 #include "components/search_engines/search_terms_data.h" |
| 16 #include "components/search_engines/template_url.h" | 17 #include "components/search_engines/template_url.h" |
| 17 #include "components/search_engines/template_url_service.h" | 18 #include "components/search_engines/template_url_service.h" |
| 18 #include "components/search_engines/template_url_service_client.h" | 19 #include "components/search_engines/template_url_service_client.h" |
| 19 #include "testing/gmock/include/gmock/gmock.h" | 20 #include "testing/gmock/include/gmock/gmock.h" |
| 20 #include "testing/gtest/include/gtest/gtest.h" | 21 #include "testing/gtest/include/gtest/gtest.h" |
| 21 | 22 |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 112 const base::string16& title) { | 113 const base::string16& title) { |
| 113 // Make an empty match and simply populate the fields we need in order | 114 // Make an empty match and simply populate the fields we need in order |
| 114 // to call GetTopicalityScore(). | 115 // to call GetTopicalityScore(). |
| 115 ScoredHistoryMatch scored_match; | 116 ScoredHistoryMatch scored_match; |
| 116 scored_match.url_matches = MatchTermInString(term, url, 0); | 117 scored_match.url_matches = MatchTermInString(term, url, 0); |
| 117 scored_match.title_matches = MatchTermInString(term, title, 0); | 118 scored_match.title_matches = MatchTermInString(term, title, 0); |
| 118 scored_match.topicality_threshold_ = -1; | 119 scored_match.topicality_threshold_ = -1; |
| 119 RowWordStarts word_starts; | 120 RowWordStarts word_starts; |
| 120 String16SetFromString16(url, &word_starts.url_word_starts_); | 121 String16SetFromString16(url, &word_starts.url_word_starts_); |
| 121 String16SetFromString16(title, &word_starts.title_word_starts_); | 122 String16SetFromString16(title, &word_starts.title_word_starts_); |
| 122 WordStarts one_word_no_offset(1, 0u); | 123 WordStarts term_word_starts(1, 0u); |
| 123 return scored_match.GetTopicalityScore(1, url, one_word_no_offset, | 124 base::i18n::BreakIterator iter(term, base::i18n::BreakIterator::BREAK_WORD); |
| 125 if (iter.Init()) { | |
| 126 // Find the first word start. | |
| 127 while (iter.Advance() && !iter.IsWord()) {} | |
| 128 term_word_starts[0] = iter.prev(); | |
| 129 } | |
| 130 return scored_match.GetTopicalityScore(1, url, term_word_starts, | |
| 124 word_starts); | 131 word_starts); |
| 125 } | 132 } |
| 126 | 133 |
| 127 TEST_F(ScoredHistoryMatchTest, Scoring) { | 134 TEST_F(ScoredHistoryMatchTest, Scoring) { |
| 128 // We use NowFromSystemTime() because MakeURLRow uses the same function | 135 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 129 // to calculate last visit time when building a row. | 136 // to calculate last visit time when building a row. |
| 130 base::Time now = base::Time::NowFromSystemTime(); | 137 base::Time now = base::Time::NowFromSystemTime(); |
| 131 | 138 |
| 132 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); | 139 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); |
| 133 RowWordStarts word_starts_a; | 140 RowWordStarts word_starts_a; |
| (...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 543 } | 550 } |
| 544 | 551 |
| 545 // This function only tests scoring of single terms that match exactly | 552 // This function only tests scoring of single terms that match exactly |
| 546 // once somewhere in the URL or title. | 553 // once somewhere in the URL or title. |
| 547 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { | 554 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { |
| 548 base::string16 url = ASCIIToUTF16( | 555 base::string16 url = ASCIIToUTF16( |
| 549 "http://abc.def.com/path1/path2?" | 556 "http://abc.def.com/path1/path2?" |
| 550 "arg1=val1&arg2=val2#hash_component"); | 557 "arg1=val1&arg2=val2#hash_component"); |
| 551 base::string16 title = ASCIIToUTF16("here is a title"); | 558 base::string16 title = ASCIIToUTF16("here is a title"); |
| 552 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 559 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 553 ASCIIToUTF16("abc"), url, title); | 560 ASCIIToUTF16("abc"), url, title); |
|
Peter Kasting
2016/10/17 23:34:37
Nit: Define a lambda here (not sure how git cl for
Mark P
2016/10/18 22:57:29
Good idea. I find my aging brain doesn't think of
| |
| 554 const float hostname_mid_word_score = | 561 const float hostname_mid_word_score = |
| 555 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, | 562 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, |
| 556 title); | 563 title); |
| 564 const float hostname_score_preceeding_punctuation = | |
| 565 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("://abc"), url, | |
| 566 title); | |
| 557 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 567 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 558 ASCIIToUTF16("def"), url, title); | 568 ASCIIToUTF16("def"), url, title); |
| 559 const float domain_name_mid_word_score = | 569 const float domain_name_mid_word_score = |
| 560 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, | 570 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, |
| 561 title); | 571 title); |
| 572 const float domain_name_score_preceeding_dot = | |
| 573 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".def"), url, | |
| 574 title); | |
| 562 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 575 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 563 ASCIIToUTF16("com"), url, title); | 576 ASCIIToUTF16("com"), url, title); |
| 564 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 577 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 565 ASCIIToUTF16("om"), url, title); | 578 ASCIIToUTF16("om"), url, title); |
| 579 const float tld_score_preceeding_dot = | |
| 580 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".com"), url, | |
| 581 title); | |
| 566 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 582 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 567 ASCIIToUTF16("path1"), url, title); | 583 ASCIIToUTF16("path1"), url, title); |
| 568 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 584 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 569 ASCIIToUTF16("ath1"), url, title); | 585 ASCIIToUTF16("ath1"), url, title); |
| 586 const float path_score_preceeding_slash = | |
| 587 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("/path1"), url, | |
| 588 title); | |
| 570 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 589 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 571 ASCIIToUTF16("arg2"), url, title); | 590 ASCIIToUTF16("arg1"), url, title); |
| 572 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 591 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 573 ASCIIToUTF16("rg2"), url, title); | 592 ASCIIToUTF16("rg1"), url, title); |
| 593 const float arg_score_preceeding_question_mark = | |
| 594 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("?arg1"), url, | |
| 595 title); | |
| 574 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 596 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 575 ASCIIToUTF16("htt"), url, title); | 597 ASCIIToUTF16("htt"), url, title); |
| 576 const float protocol_mid_word_score = | 598 const float protocol_mid_word_score = |
| 577 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, | 599 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, |
| 578 title); | 600 title); |
| 579 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 601 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 580 ASCIIToUTF16("her"), url, title); | 602 ASCIIToUTF16("her"), url, title); |
| 581 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( | 603 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 582 ASCIIToUTF16("er"), url, title); | 604 ASCIIToUTF16("er"), url, title); |
| 583 // Verify hostname and domain name > path > arg. | 605 // Verify hostname and domain name > path > arg. |
| 584 EXPECT_GT(hostname_score, path_score); | 606 EXPECT_GT(hostname_score, path_score); |
| 585 EXPECT_GT(domain_name_score, path_score); | 607 EXPECT_GT(domain_name_score, path_score); |
| 586 EXPECT_GT(path_score, arg_score); | 608 EXPECT_GT(path_score, arg_score); |
| 609 // Verify leading punctuation doesn't confuse scoring. | |
| 610 EXPECT_EQ(hostname_score, hostname_score_preceeding_punctuation); | |
| 611 EXPECT_EQ(domain_name_score, domain_name_score_preceeding_dot); | |
| 612 EXPECT_EQ(tld_score, tld_score_preceeding_dot); | |
| 613 EXPECT_EQ(path_score, path_score_preceeding_slash); | |
| 614 EXPECT_EQ(arg_score, arg_score_preceeding_question_mark); | |
| 587 // Verify that domain name > path and domain name > arg for non-word | 615 // Verify that domain name > path and domain name > arg for non-word |
| 588 // boundaries. | 616 // boundaries. |
| 589 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); | 617 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); |
| 590 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); | 618 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); |
| 591 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); | 619 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); |
| 592 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); | 620 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); |
| 593 // Also verify that the matches at non-word-boundaries all score | 621 // Also verify that the matches at non-word-boundaries all score |
| 594 // worse than the matches at word boundaries. These three sets suffice. | 622 // worse than the matches at word boundaries. These three sets suffice. |
| 595 EXPECT_GT(arg_score, hostname_mid_word_score); | 623 EXPECT_GT(arg_score, hostname_mid_word_score); |
| 596 EXPECT_GT(arg_score, domain_name_mid_word_score); | 624 EXPECT_GT(arg_score, domain_name_mid_word_score); |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 654 | 682 |
| 655 EXPECT_TRUE( | 683 EXPECT_TRUE( |
| 656 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); | 684 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
| 657 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), | 685 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), |
| 658 Pair(12.0, 1300), Pair(20.0, 1399))); | 686 Pair(12.0, 1300), Pair(20.0, 1399))); |
| 659 // invalid string. | 687 // invalid string. |
| 660 buckets_str = "0.0,400,1.5,600"; | 688 buckets_str = "0.0,400,1.5,600"; |
| 661 EXPECT_FALSE( | 689 EXPECT_FALSE( |
| 662 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); | 690 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
| 663 } | 691 } |
| OLD | NEW |