Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(419)

Side by Side Diff: components/omnibox/browser/scored_history_match_unittest.cc

Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation (Closed)
Patch Set: restore dcheck Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/omnibox/browser/scored_history_match.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <memory> 8 #include <memory>
9 9
10 #include "base/auto_reset.h" 10 #include "base/auto_reset.h"
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/i18n/break_iterator.h"
12 #include "base/memory/ptr_util.h" 13 #include "base/memory/ptr_util.h"
13 #include "base/strings/string16.h" 14 #include "base/strings/string16.h"
14 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
15 #include "components/search_engines/search_terms_data.h" 16 #include "components/search_engines/search_terms_data.h"
16 #include "components/search_engines/template_url.h" 17 #include "components/search_engines/template_url.h"
17 #include "components/search_engines/template_url_service.h" 18 #include "components/search_engines/template_url_service.h"
18 #include "components/search_engines/template_url_service_client.h" 19 #include "components/search_engines/template_url_service_client.h"
19 #include "testing/gmock/include/gmock/gmock.h" 20 #include "testing/gmock/include/gmock/gmock.h"
20 #include "testing/gtest/include/gtest/gtest.h" 21 #include "testing/gtest/include/gtest/gtest.h"
21 22
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
112 const base::string16& title) { 113 const base::string16& title) {
113 // Make an empty match and simply populate the fields we need in order 114 // Make an empty match and simply populate the fields we need in order
114 // to call GetTopicalityScore(). 115 // to call GetTopicalityScore().
115 ScoredHistoryMatch scored_match; 116 ScoredHistoryMatch scored_match;
116 scored_match.url_matches = MatchTermInString(term, url, 0); 117 scored_match.url_matches = MatchTermInString(term, url, 0);
117 scored_match.title_matches = MatchTermInString(term, title, 0); 118 scored_match.title_matches = MatchTermInString(term, title, 0);
118 scored_match.topicality_threshold_ = -1; 119 scored_match.topicality_threshold_ = -1;
119 RowWordStarts word_starts; 120 RowWordStarts word_starts;
120 String16SetFromString16(url, &word_starts.url_word_starts_); 121 String16SetFromString16(url, &word_starts.url_word_starts_);
121 String16SetFromString16(title, &word_starts.title_word_starts_); 122 String16SetFromString16(title, &word_starts.title_word_starts_);
122 WordStarts one_word_no_offset(1, 0u); 123 WordStarts term_word_starts(1, 0u);
123 return scored_match.GetTopicalityScore(1, url, one_word_no_offset, 124 base::i18n::BreakIterator iter(term, base::i18n::BreakIterator::BREAK_WORD);
125 if (iter.Init()) {
126 // Find the first word start.
127 while (iter.Advance() && !iter.IsWord()) {}
128 term_word_starts[0] = iter.prev();
129 }
130 return scored_match.GetTopicalityScore(1, url, term_word_starts,
124 word_starts); 131 word_starts);
125 } 132 }
126 133
127 TEST_F(ScoredHistoryMatchTest, Scoring) { 134 TEST_F(ScoredHistoryMatchTest, Scoring) {
128 // We use NowFromSystemTime() because MakeURLRow uses the same function 135 // We use NowFromSystemTime() because MakeURLRow uses the same function
129 // to calculate last visit time when building a row. 136 // to calculate last visit time when building a row.
130 base::Time now = base::Time::NowFromSystemTime(); 137 base::Time now = base::Time::NowFromSystemTime();
131 138
132 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); 139 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1));
133 RowWordStarts word_starts_a; 140 RowWordStarts word_starts_a;
(...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after
543 } 550 }
544 551
545 // This function only tests scoring of single terms that match exactly 552 // This function only tests scoring of single terms that match exactly
546 // once somewhere in the URL or title. 553 // once somewhere in the URL or title.
547 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { 554 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) {
548 base::string16 url = ASCIIToUTF16( 555 base::string16 url = ASCIIToUTF16(
549 "http://abc.def.com/path1/path2?" 556 "http://abc.def.com/path1/path2?"
550 "arg1=val1&arg2=val2#hash_component"); 557 "arg1=val1&arg2=val2#hash_component");
551 base::string16 title = ASCIIToUTF16("here is a title"); 558 base::string16 title = ASCIIToUTF16("here is a title");
552 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 559 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
553 ASCIIToUTF16("abc"), url, title); 560 ASCIIToUTF16("abc"), url, title);
Peter Kasting 2016/10/17 23:34:37 Nit: Define a lambda here (not sure how git cl for
Mark P 2016/10/18 22:57:29 Good idea. I find my aging brain doesn't think of
554 const float hostname_mid_word_score = 561 const float hostname_mid_word_score =
555 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, 562 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url,
556 title); 563 title);
564 const float hostname_score_preceeding_punctuation =
565 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("://abc"), url,
566 title);
557 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 567 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
558 ASCIIToUTF16("def"), url, title); 568 ASCIIToUTF16("def"), url, title);
559 const float domain_name_mid_word_score = 569 const float domain_name_mid_word_score =
560 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, 570 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url,
561 title); 571 title);
572 const float domain_name_score_preceeding_dot =
573 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".def"), url,
574 title);
562 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 575 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
563 ASCIIToUTF16("com"), url, title); 576 ASCIIToUTF16("com"), url, title);
564 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 577 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
565 ASCIIToUTF16("om"), url, title); 578 ASCIIToUTF16("om"), url, title);
579 const float tld_score_preceeding_dot =
580 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(".com"), url,
581 title);
566 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 582 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
567 ASCIIToUTF16("path1"), url, title); 583 ASCIIToUTF16("path1"), url, title);
568 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 584 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
569 ASCIIToUTF16("ath1"), url, title); 585 ASCIIToUTF16("ath1"), url, title);
586 const float path_score_preceeding_slash =
587 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("/path1"), url,
588 title);
570 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 589 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
571 ASCIIToUTF16("arg2"), url, title); 590 ASCIIToUTF16("arg1"), url, title);
572 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 591 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
573 ASCIIToUTF16("rg2"), url, title); 592 ASCIIToUTF16("rg1"), url, title);
593 const float arg_score_preceeding_question_mark =
594 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("?arg1"), url,
595 title);
574 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 596 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
575 ASCIIToUTF16("htt"), url, title); 597 ASCIIToUTF16("htt"), url, title);
576 const float protocol_mid_word_score = 598 const float protocol_mid_word_score =
577 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, 599 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url,
578 title); 600 title);
579 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 601 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
580 ASCIIToUTF16("her"), url, title); 602 ASCIIToUTF16("her"), url, title);
581 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 603 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
582 ASCIIToUTF16("er"), url, title); 604 ASCIIToUTF16("er"), url, title);
583 // Verify hostname and domain name > path > arg. 605 // Verify hostname and domain name > path > arg.
584 EXPECT_GT(hostname_score, path_score); 606 EXPECT_GT(hostname_score, path_score);
585 EXPECT_GT(domain_name_score, path_score); 607 EXPECT_GT(domain_name_score, path_score);
586 EXPECT_GT(path_score, arg_score); 608 EXPECT_GT(path_score, arg_score);
609 // Verify leading punctuation doesn't confuse scoring.
610 EXPECT_EQ(hostname_score, hostname_score_preceeding_punctuation);
611 EXPECT_EQ(domain_name_score, domain_name_score_preceeding_dot);
612 EXPECT_EQ(tld_score, tld_score_preceeding_dot);
613 EXPECT_EQ(path_score, path_score_preceeding_slash);
614 EXPECT_EQ(arg_score, arg_score_preceeding_question_mark);
587 // Verify that domain name > path and domain name > arg for non-word 615 // Verify that domain name > path and domain name > arg for non-word
588 // boundaries. 616 // boundaries.
589 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); 617 EXPECT_GT(hostname_mid_word_score, path_mid_word_score);
590 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); 618 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score);
591 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); 619 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score);
592 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); 620 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score);
593 // Also verify that the matches at non-word-boundaries all score 621 // Also verify that the matches at non-word-boundaries all score
594 // worse than the matches at word boundaries. These three sets suffice. 622 // worse than the matches at word boundaries. These three sets suffice.
595 EXPECT_GT(arg_score, hostname_mid_word_score); 623 EXPECT_GT(arg_score, hostname_mid_word_score);
596 EXPECT_GT(arg_score, domain_name_mid_word_score); 624 EXPECT_GT(arg_score, domain_name_mid_word_score);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
654 682
655 EXPECT_TRUE( 683 EXPECT_TRUE(
656 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); 684 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));
657 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), 685 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600),
658 Pair(12.0, 1300), Pair(20.0, 1399))); 686 Pair(12.0, 1300), Pair(20.0, 1399)));
659 // invalid string. 687 // invalid string.
660 buckets_str = "0.0,400,1.5,600"; 688 buckets_str = "0.0,400,1.5,600";
661 EXPECT_FALSE( 689 EXPECT_FALSE(
662 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); 690 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));
663 } 691 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/scored_history_match.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698