OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/omnibox/scored_history_match.h" |
| 6 |
| 7 #include <algorithm> |
| 8 |
| 9 #include "base/auto_reset.h" |
| 10 #include "base/bind.h" |
| 11 #include "base/memory/scoped_ptr.h" |
| 12 #include "base/strings/string16.h" |
| 13 #include "base/strings/utf_string_conversions.h" |
| 14 #include "testing/gmock/include/gmock/gmock.h" |
| 15 #include "testing/gtest/include/gtest/gtest.h" |
| 16 |
| 17 using base::ASCIIToUTF16; |
| 18 using testing::ElementsAre; |
| 19 using testing::Pair; |
| 20 |
| 21 namespace { |
| 22 |
| 23 // Returns a VisitInfoVector that includes |num_visits| spread over the |
| 24 // last |frequency|*|num_visits| days (relative to |now|). A frequency of |
| 25 // one means one visit each day, two means every other day, etc. |
| 26 VisitInfoVector CreateVisitInfoVector(int num_visits, |
| 27 int frequency, |
| 28 base::Time now) { |
| 29 VisitInfoVector visits; |
| 30 for (int i = 0; i < num_visits; ++i) { |
| 31 visits.push_back( |
| 32 std::make_pair(now - base::TimeDelta::FromDays(i * frequency), |
| 33 ui::PAGE_TRANSITION_LINK)); |
| 34 } |
| 35 return visits; |
| 36 } |
| 37 |
| 38 } // namespace |
| 39 |
| 40 class ScoredHistoryMatchTest : public testing::Test { |
| 41 protected: |
| 42 // Convenience function to create a history::URLRow with basic data for |url|, |
| 43 // |title|, |visit_count|, and |typed_count|. |days_since_last_visit| gives |
| 44 // the number of days ago to which to set the URL's last_visit. |
| 45 history::URLRow MakeURLRow(const char* url, |
| 46 const char* title, |
| 47 int visit_count, |
| 48 int days_since_last_visit, |
| 49 int typed_count); |
| 50 |
| 51 // Convenience function to set the word starts information from a |
| 52 // history::URLRow's URL and title. |
| 53 void PopulateWordStarts(const history::URLRow& url_row, |
| 54 RowWordStarts* word_starts); |
| 55 |
| 56 // Convenience functions for easily creating vectors of search terms. |
| 57 String16Vector Make1Term(const char* term) const; |
| 58 String16Vector Make2Terms(const char* term_1, const char* term_2) const; |
| 59 |
| 60 // Convenience function for GetTopicalityScore() that builds the term match |
| 61 // and word break information automatically that are needed to call |
| 62 // GetTopicalityScore(). It only works for scoring a single term, not |
| 63 // multiple terms. |
| 64 float GetTopicalityScoreOfTermAgainstURLAndTitle(const base::string16& term, |
| 65 const base::string16& url, |
| 66 const base::string16& title); |
| 67 }; |
| 68 |
| 69 history::URLRow ScoredHistoryMatchTest::MakeURLRow(const char* url, |
| 70 const char* title, |
| 71 int visit_count, |
| 72 int days_since_last_visit, |
| 73 int typed_count) { |
| 74 history::URLRow row(GURL(url), 0); |
| 75 row.set_title(ASCIIToUTF16(title)); |
| 76 row.set_visit_count(visit_count); |
| 77 row.set_typed_count(typed_count); |
| 78 row.set_last_visit(base::Time::NowFromSystemTime() - |
| 79 base::TimeDelta::FromDays(days_since_last_visit)); |
| 80 return row; |
| 81 } |
| 82 |
| 83 void ScoredHistoryMatchTest::PopulateWordStarts(const history::URLRow& url_row, |
| 84 RowWordStarts* word_starts) { |
| 85 String16SetFromString16(ASCIIToUTF16(url_row.url().spec()), |
| 86 &word_starts->url_word_starts_); |
| 87 String16SetFromString16(url_row.title(), &word_starts->title_word_starts_); |
| 88 } |
| 89 |
| 90 String16Vector ScoredHistoryMatchTest::Make1Term(const char* term) const { |
| 91 String16Vector original_terms; |
| 92 original_terms.push_back(ASCIIToUTF16(term)); |
| 93 return original_terms; |
| 94 } |
| 95 |
| 96 String16Vector ScoredHistoryMatchTest::Make2Terms(const char* term_1, |
| 97 const char* term_2) const { |
| 98 String16Vector original_terms; |
| 99 original_terms.push_back(ASCIIToUTF16(term_1)); |
| 100 original_terms.push_back(ASCIIToUTF16(term_2)); |
| 101 return original_terms; |
| 102 } |
| 103 |
| 104 float ScoredHistoryMatchTest::GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 105 const base::string16& term, |
| 106 const base::string16& url, |
| 107 const base::string16& title) { |
| 108 // Make an empty match and simply populate the fields we need in order |
| 109 // to call GetTopicalityScore(). |
| 110 ScoredHistoryMatch scored_match; |
| 111 scored_match.url_matches = MatchTermInString(term, url, 0); |
| 112 scored_match.title_matches = MatchTermInString(term, title, 0); |
| 113 RowWordStarts word_starts; |
| 114 String16SetFromString16(url, &word_starts.url_word_starts_); |
| 115 String16SetFromString16(title, &word_starts.title_word_starts_); |
| 116 WordStarts one_word_no_offset(1, 0u); |
| 117 return scored_match.GetTopicalityScore(1, url, one_word_no_offset, |
| 118 word_starts); |
| 119 } |
| 120 |
| 121 TEST_F(ScoredHistoryMatchTest, Scoring) { |
| 122 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 123 // to calculate last visit time when building a row. |
| 124 base::Time now = base::Time::NowFromSystemTime(); |
| 125 |
| 126 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); |
| 127 RowWordStarts word_starts_a; |
| 128 PopulateWordStarts(row_a, &word_starts_a); |
| 129 WordStarts one_word_no_offset(1, 0u); |
| 130 VisitInfoVector visits_a = CreateVisitInfoVector(3, 30, now); |
| 131 // Mark one visit as typed. |
| 132 visits_a[0].second = ui::PAGE_TRANSITION_TYPED; |
| 133 ScoredHistoryMatch scored_a(row_a, visits_a, std::string(), |
| 134 ASCIIToUTF16("abc"), Make1Term("abc"), |
| 135 one_word_no_offset, word_starts_a, false, now); |
| 136 |
| 137 // Test scores based on visit_count. |
| 138 history::URLRow row_b(MakeURLRow("http://abcdef", "abcd bcd", 10, 30, 1)); |
| 139 RowWordStarts word_starts_b; |
| 140 PopulateWordStarts(row_b, &word_starts_b); |
| 141 VisitInfoVector visits_b = CreateVisitInfoVector(10, 30, now); |
| 142 visits_b[0].second = ui::PAGE_TRANSITION_TYPED; |
| 143 ScoredHistoryMatch scored_b(row_b, visits_b, std::string(), |
| 144 ASCIIToUTF16("abc"), Make1Term("abc"), |
| 145 one_word_no_offset, word_starts_b, false, now); |
| 146 EXPECT_GT(scored_b.raw_score, scored_a.raw_score); |
| 147 |
| 148 // Test scores based on last_visit. |
| 149 history::URLRow row_c(MakeURLRow("http://abcdef", "abcd bcd", 3, 10, 1)); |
| 150 RowWordStarts word_starts_c; |
| 151 PopulateWordStarts(row_c, &word_starts_c); |
| 152 VisitInfoVector visits_c = CreateVisitInfoVector(3, 10, now); |
| 153 visits_c[0].second = ui::PAGE_TRANSITION_TYPED; |
| 154 ScoredHistoryMatch scored_c(row_c, visits_c, std::string(), |
| 155 ASCIIToUTF16("abc"), Make1Term("abc"), |
| 156 one_word_no_offset, word_starts_c, false, now); |
| 157 EXPECT_GT(scored_c.raw_score, scored_a.raw_score); |
| 158 |
| 159 // Test scores based on typed_count. |
| 160 history::URLRow row_d(MakeURLRow("http://abcdef", "abcd bcd", 3, 30, 3)); |
| 161 RowWordStarts word_starts_d; |
| 162 PopulateWordStarts(row_d, &word_starts_d); |
| 163 VisitInfoVector visits_d = CreateVisitInfoVector(3, 30, now); |
| 164 visits_d[0].second = ui::PAGE_TRANSITION_TYPED; |
| 165 visits_d[1].second = ui::PAGE_TRANSITION_TYPED; |
| 166 visits_d[2].second = ui::PAGE_TRANSITION_TYPED; |
| 167 ScoredHistoryMatch scored_d(row_d, visits_d, std::string(), |
| 168 ASCIIToUTF16("abc"), Make1Term("abc"), |
| 169 one_word_no_offset, word_starts_d, false, now); |
| 170 EXPECT_GT(scored_d.raw_score, scored_a.raw_score); |
| 171 |
| 172 // Test scores based on a terms appearing multiple times. |
| 173 history::URLRow row_e(MakeURLRow( |
| 174 "http://csi.csi.csi/csi_csi", |
| 175 "CSI Guide to CSI Las Vegas, CSI New York, CSI Provo", 3, 30, 3)); |
| 176 RowWordStarts word_starts_e; |
| 177 PopulateWordStarts(row_e, &word_starts_e); |
| 178 const VisitInfoVector visits_e = visits_d; |
| 179 ScoredHistoryMatch scored_e(row_e, visits_e, std::string(), |
| 180 ASCIIToUTF16("csi"), Make1Term("csi"), |
| 181 one_word_no_offset, word_starts_e, false, now); |
| 182 EXPECT_LT(scored_e.raw_score, 1400); |
| 183 |
| 184 // Test that a result with only a mid-term match (i.e., not at a word |
| 185 // boundary) scores 0. |
| 186 ScoredHistoryMatch scored_f(row_a, visits_a, std::string(), |
| 187 ASCIIToUTF16("cd"), Make1Term("cd"), |
| 188 one_word_no_offset, word_starts_a, false, now); |
| 189 EXPECT_EQ(scored_f.raw_score, 0); |
| 190 } |
| 191 |
| 192 TEST_F(ScoredHistoryMatchTest, ScoringBookmarks) { |
| 193 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 194 // to calculate last visit time when building a row. |
| 195 base::Time now = base::Time::NowFromSystemTime(); |
| 196 |
| 197 std::string url_string("http://fedcba"); |
| 198 const GURL url(url_string); |
| 199 history::URLRow row(MakeURLRow(url_string.c_str(), "abcd bcd", 8, 3, 1)); |
| 200 RowWordStarts word_starts; |
| 201 PopulateWordStarts(row, &word_starts); |
| 202 WordStarts one_word_no_offset(1, 0u); |
| 203 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now); |
| 204 ScoredHistoryMatch scored(row, visits, std::string(), ASCIIToUTF16("abc"), |
| 205 Make1Term("abc"), one_word_no_offset, word_starts, |
| 206 false, now); |
| 207 // Now check that if URL is bookmarked then its score increases. |
| 208 base::AutoReset<int> reset(&ScoredHistoryMatch::bookmark_value_, 5); |
| 209 ScoredHistoryMatch scored_with_bookmark( |
| 210 row, visits, std::string(), ASCIIToUTF16("abc"), Make1Term("abc"), |
| 211 one_word_no_offset, word_starts, true, now); |
| 212 EXPECT_GT(scored_with_bookmark.raw_score, scored.raw_score); |
| 213 } |
| 214 |
| 215 TEST_F(ScoredHistoryMatchTest, ScoringTLD) { |
| 216 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 217 // to calculate last visit time when building a row. |
| 218 base::Time now = base::Time::NowFromSystemTime(); |
| 219 |
| 220 // By default the URL should not be returned for a query that includes "com". |
| 221 std::string url_string("http://fedcba.com/"); |
| 222 const GURL url(url_string); |
| 223 history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1)); |
| 224 RowWordStarts word_starts; |
| 225 PopulateWordStarts(row, &word_starts); |
| 226 WordStarts two_words_no_offsets(2, 0u); |
| 227 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now); |
| 228 ScoredHistoryMatch scored(row, visits, std::string(), ASCIIToUTF16("fed com"), |
| 229 Make2Terms("fed", "com"), two_words_no_offsets, |
| 230 word_starts, false, now); |
| 231 EXPECT_EQ(0, scored.raw_score); |
| 232 |
| 233 // Now allow credit for the match in the TLD. |
| 234 base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_tld_matches_, true); |
| 235 ScoredHistoryMatch scored_with_tld( |
| 236 row, visits, std::string(), ASCIIToUTF16("fed com"), |
| 237 Make2Terms("fed", "com"), two_words_no_offsets, word_starts, false, now); |
| 238 EXPECT_GT(scored_with_tld.raw_score, 0); |
| 239 } |
| 240 |
| 241 TEST_F(ScoredHistoryMatchTest, ScoringScheme) { |
| 242 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 243 // to calculate last visit time when building a row. |
| 244 base::Time now = base::Time::NowFromSystemTime(); |
| 245 |
| 246 // By default the URL should not be returned for a query that includes "http". |
| 247 std::string url_string("http://fedcba/"); |
| 248 const GURL url(url_string); |
| 249 history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1)); |
| 250 RowWordStarts word_starts; |
| 251 PopulateWordStarts(row, &word_starts); |
| 252 WordStarts two_words_no_offsets(2, 0u); |
| 253 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now); |
| 254 ScoredHistoryMatch scored(row, visits, std::string(), |
| 255 ASCIIToUTF16("fed http"), Make2Terms("fed", "http"), |
| 256 two_words_no_offsets, word_starts, false, now); |
| 257 EXPECT_EQ(0, scored.raw_score); |
| 258 |
| 259 // Now allow credit for the match in the scheme. |
| 260 base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_scheme_matches_, true); |
| 261 ScoredHistoryMatch scored_with_scheme( |
| 262 row, visits, std::string(), ASCIIToUTF16("fed http"), |
| 263 Make2Terms("fed", "http"), two_words_no_offsets, word_starts, false, now); |
| 264 EXPECT_GT(scored_with_scheme.raw_score, 0); |
| 265 } |
| 266 |
| 267 TEST_F(ScoredHistoryMatchTest, Inlining) { |
| 268 // We use NowFromSystemTime() because MakeURLRow uses the same function |
| 269 // to calculate last visit time when building a row. |
| 270 base::Time now = base::Time::NowFromSystemTime(); |
| 271 RowWordStarts word_starts; |
| 272 WordStarts one_word_no_offset(1, 0u); |
| 273 VisitInfoVector visits; |
| 274 |
| 275 { |
| 276 history::URLRow row( |
| 277 MakeURLRow("http://www.google.com", "abcdef", 3, 30, 1)); |
| 278 PopulateWordStarts(row, &word_starts); |
| 279 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("g"), |
| 280 Make1Term("g"), one_word_no_offset, word_starts, |
| 281 false, now); |
| 282 EXPECT_TRUE(scored_a.can_inline); |
| 283 EXPECT_FALSE(scored_a.match_in_scheme); |
| 284 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("w"), |
| 285 Make1Term("w"), one_word_no_offset, word_starts, |
| 286 false, now); |
| 287 EXPECT_TRUE(scored_b.can_inline); |
| 288 EXPECT_FALSE(scored_b.match_in_scheme); |
| 289 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("h"), |
| 290 Make1Term("h"), one_word_no_offset, word_starts, |
| 291 false, now); |
| 292 EXPECT_TRUE(scored_c.can_inline); |
| 293 EXPECT_TRUE(scored_c.match_in_scheme); |
| 294 ScoredHistoryMatch scored_d(row, visits, std::string(), ASCIIToUTF16("o"), |
| 295 Make1Term("o"), one_word_no_offset, word_starts, |
| 296 false, now); |
| 297 EXPECT_FALSE(scored_d.can_inline); |
| 298 EXPECT_FALSE(scored_d.match_in_scheme); |
| 299 } |
| 300 |
| 301 { |
| 302 history::URLRow row(MakeURLRow("http://teams.foo.com", "abcdef", 3, 30, 1)); |
| 303 PopulateWordStarts(row, &word_starts); |
| 304 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("t"), |
| 305 Make1Term("t"), one_word_no_offset, word_starts, |
| 306 false, now); |
| 307 EXPECT_TRUE(scored_a.can_inline); |
| 308 EXPECT_FALSE(scored_a.match_in_scheme); |
| 309 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("f"), |
| 310 Make1Term("f"), one_word_no_offset, word_starts, |
| 311 false, now); |
| 312 EXPECT_FALSE(scored_b.can_inline); |
| 313 EXPECT_FALSE(scored_b.match_in_scheme); |
| 314 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("o"), |
| 315 Make1Term("o"), one_word_no_offset, word_starts, |
| 316 false, now); |
| 317 EXPECT_FALSE(scored_c.can_inline); |
| 318 EXPECT_FALSE(scored_c.match_in_scheme); |
| 319 } |
| 320 |
| 321 { |
| 322 history::URLRow row( |
| 323 MakeURLRow("https://www.testing.com", "abcdef", 3, 30, 1)); |
| 324 PopulateWordStarts(row, &word_starts); |
| 325 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("t"), |
| 326 Make1Term("t"), one_word_no_offset, word_starts, |
| 327 false, now); |
| 328 EXPECT_TRUE(scored_a.can_inline); |
| 329 EXPECT_FALSE(scored_a.match_in_scheme); |
| 330 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("h"), |
| 331 Make1Term("h"), one_word_no_offset, word_starts, |
| 332 false, now); |
| 333 EXPECT_TRUE(scored_b.can_inline); |
| 334 EXPECT_TRUE(scored_b.match_in_scheme); |
| 335 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("w"), |
| 336 Make1Term("w"), one_word_no_offset, word_starts, |
| 337 false, now); |
| 338 EXPECT_TRUE(scored_c.can_inline); |
| 339 EXPECT_FALSE(scored_c.match_in_scheme); |
| 340 } |
| 341 |
| 342 { |
| 343 history::URLRow row( |
| 344 MakeURLRow("http://www.xn--1lq90ic7f1rc.cn/xnblah", "abcd", 3, 30, 1)); |
| 345 PopulateWordStarts(row, &word_starts); |
| 346 ScoredHistoryMatch scored_a(row, visits, "zh-CN", ASCIIToUTF16("x"), |
| 347 Make1Term("x"), one_word_no_offset, word_starts, |
| 348 false, now); |
| 349 EXPECT_FALSE(scored_a.can_inline); |
| 350 EXPECT_FALSE(scored_a.match_in_scheme); |
| 351 ScoredHistoryMatch scored_b(row, visits, "zh-CN", ASCIIToUTF16("xn"), |
| 352 Make1Term("xn"), one_word_no_offset, |
| 353 word_starts, false, now); |
| 354 EXPECT_FALSE(scored_b.can_inline); |
| 355 EXPECT_FALSE(scored_b.match_in_scheme); |
| 356 ScoredHistoryMatch scored_c(row, visits, "zh-CN", ASCIIToUTF16("w"), |
| 357 Make1Term("w"), one_word_no_offset, |
| 358 word_starts, false, now); |
| 359 EXPECT_TRUE(scored_c.can_inline); |
| 360 EXPECT_FALSE(scored_c.match_in_scheme); |
| 361 } |
| 362 } |
| 363 |
| 364 TEST_F(ScoredHistoryMatchTest, GetTopicalityScoreTrailingSlash) { |
| 365 const float hostname = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 366 ASCIIToUTF16("def"), ASCIIToUTF16("http://abc.def.com/"), |
| 367 ASCIIToUTF16("Non-Matching Title")); |
| 368 const float hostname_no_slash = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 369 ASCIIToUTF16("def"), ASCIIToUTF16("http://abc.def.com"), |
| 370 ASCIIToUTF16("Non-Matching Title")); |
| 371 EXPECT_EQ(hostname_no_slash, hostname); |
| 372 } |
| 373 |
| 374 // This function only tests scoring of single terms that match exactly |
| 375 // once somewhere in the URL or title. |
| 376 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { |
| 377 base::string16 url = ASCIIToUTF16( |
| 378 "http://abc.def.com/path1/path2?" |
| 379 "arg1=val1&arg2=val2#hash_component"); |
| 380 base::string16 title = ASCIIToUTF16("here is a title"); |
| 381 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 382 ASCIIToUTF16("abc"), url, title); |
| 383 const float hostname_mid_word_score = |
| 384 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, |
| 385 title); |
| 386 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 387 ASCIIToUTF16("def"), url, title); |
| 388 const float domain_name_mid_word_score = |
| 389 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, |
| 390 title); |
| 391 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 392 ASCIIToUTF16("com"), url, title); |
| 393 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 394 ASCIIToUTF16("om"), url, title); |
| 395 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 396 ASCIIToUTF16("path1"), url, title); |
| 397 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 398 ASCIIToUTF16("ath1"), url, title); |
| 399 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 400 ASCIIToUTF16("arg2"), url, title); |
| 401 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 402 ASCIIToUTF16("rg2"), url, title); |
| 403 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 404 ASCIIToUTF16("htt"), url, title); |
| 405 const float protocol_mid_word_score = |
| 406 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url, |
| 407 title); |
| 408 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 409 ASCIIToUTF16("her"), url, title); |
| 410 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( |
| 411 ASCIIToUTF16("er"), url, title); |
| 412 // Verify hostname and domain name > path > arg. |
| 413 EXPECT_GT(hostname_score, path_score); |
| 414 EXPECT_GT(domain_name_score, path_score); |
| 415 EXPECT_GT(path_score, arg_score); |
| 416 // Verify that domain name > path and domain name > arg for non-word |
| 417 // boundaries. |
| 418 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); |
| 419 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); |
| 420 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); |
| 421 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); |
| 422 // Also verify that the matches at non-word-boundaries all score |
| 423 // worse than the matches at word boundaries. These three sets suffice. |
| 424 EXPECT_GT(arg_score, hostname_mid_word_score); |
| 425 EXPECT_GT(arg_score, domain_name_mid_word_score); |
| 426 EXPECT_GT(title_score, title_mid_word_score); |
| 427 // Check that title matches fit somewhere reasonable compared to the |
| 428 // various types of URL matches. |
| 429 EXPECT_GT(title_score, arg_score); |
| 430 EXPECT_GT(arg_score, title_mid_word_score); |
| 431 // Finally, verify that protocol matches and top level domain name |
| 432 // matches (.com, .net, etc.) score worse than some of the mid-word |
| 433 // matches that actually count. |
| 434 EXPECT_GT(hostname_mid_word_score, protocol_score); |
| 435 EXPECT_GT(hostname_mid_word_score, protocol_mid_word_score); |
| 436 EXPECT_GT(hostname_mid_word_score, tld_score); |
| 437 EXPECT_GT(hostname_mid_word_score, tld_mid_word_score); |
| 438 } |
| 439 |
| 440 // Test the function GetFinalRelevancyScore(). |
| 441 TEST_F(ScoredHistoryMatchTest, GetFinalRelevancyScore) { |
| 442 // hqp_relevance_buckets = "0.0:100,1.0:200,4.0:500,8.0:900,10.0:1000"; |
| 443 std::vector<ScoredHistoryMatch::ScoreMaxRelevance> hqp_buckets; |
| 444 hqp_buckets.push_back(std::make_pair(0.0, 100)); |
| 445 hqp_buckets.push_back(std::make_pair(1.0, 200)); |
| 446 hqp_buckets.push_back(std::make_pair(4.0, 500)); |
| 447 hqp_buckets.push_back(std::make_pair(8.0, 900)); |
| 448 hqp_buckets.push_back(std::make_pair(10.0, 1000)); |
| 449 // Check when topicality score is zero. |
| 450 float topicality_score = 0.0; |
| 451 float frequency_score = 10.0; |
| 452 // intermediate_score = 0.0 * 10.0 = 0.0. |
| 453 EXPECT_EQ(0, ScoredHistoryMatch::GetFinalRelevancyScore( |
| 454 topicality_score, frequency_score, hqp_buckets)); |
| 455 |
| 456 // Check when intermediate score falls at the border range. |
| 457 topicality_score = 0.4f; |
| 458 frequency_score = 10.0f; |
| 459 // intermediate_score = 0.5 * 10.0 = 4.0. |
| 460 EXPECT_EQ(500, ScoredHistoryMatch::GetFinalRelevancyScore( |
| 461 topicality_score, frequency_score, hqp_buckets)); |
| 462 |
| 463 // Checking the score that falls into one of the buckets. |
| 464 topicality_score = 0.5f; |
| 465 frequency_score = 10.0f; |
| 466 // intermediate_score = 0.5 * 10.0 = 5.0. |
| 467 EXPECT_EQ(600, // 500 + (((900 - 500)/(8 -4)) * 1) = 600. |
| 468 ScoredHistoryMatch::GetFinalRelevancyScore( |
| 469 topicality_score, frequency_score, hqp_buckets)); |
| 470 |
| 471 // Never give the score greater than maximum specified. |
| 472 topicality_score = 0.5f; |
| 473 frequency_score = 22.0f; |
| 474 // intermediate_score = 0.5 * 22.0 = 11.0 |
| 475 EXPECT_EQ(1000, ScoredHistoryMatch::GetFinalRelevancyScore( |
| 476 topicality_score, frequency_score, hqp_buckets)); |
| 477 } |
| 478 |
| 479 // Test the function GetHQPBucketsFromString(). |
| 480 TEST_F(ScoredHistoryMatchTest, GetHQPBucketsFromString) { |
| 481 std::string buckets_str = "0.0:400,1.5:600,12.0:1300,20.0:1399"; |
| 482 std::vector<ScoredHistoryMatch::ScoreMaxRelevance> hqp_buckets; |
| 483 |
| 484 EXPECT_TRUE( |
| 485 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
| 486 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), |
| 487 Pair(12.0, 1300), Pair(20.0, 1399))); |
| 488 // invalid string. |
| 489 buckets_str = "0.0,400,1.5,600"; |
| 490 EXPECT_FALSE( |
| 491 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); |
| 492 } |
OLD | NEW |