Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(209)

Unified Diff: chrome/browser/history/in_memory_url_index_unittest.cc

Issue 6652008: Implemented substring matching within page titles. Fixed bug where URL was be... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/history/in_memory_url_index_unittest.cc
===================================================================
--- chrome/browser/history/in_memory_url_index_unittest.cc (revision 77643)
+++ chrome/browser/history/in_memory_url_index_unittest.cc (working copy)
@@ -48,8 +48,7 @@
PathService::Get(chrome::DIR_TEST_DATA, &history_proto_path);
history_proto_path = history_proto_path.Append(
FILE_PATH_LITERAL("History"));
- history_proto_path = history_proto_path.Append(
- FILE_PATH_LITERAL("url_history_provider_test.db.txt"));
+ history_proto_path = history_proto_path.Append(TestDBName());
EXPECT_TRUE(file_util::PathExists(history_proto_path));
std::ifstream proto_file(history_proto_path.value().c_str());
@@ -82,7 +81,7 @@
"SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
EXPECT_TRUE(statement);
base::Time time_right_now = base::Time::NowFromSystemTime();
- base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
+ base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
{
sql::Transaction transaction(&db);
transaction.Begin();
@@ -99,31 +98,69 @@
}
}
+ virtual FilePath::StringType TestDBName() const {
+ return FILE_PATH_LITERAL("url_history_provider_test.db.txt");
+ }
+
+ URLRow MakeURLRow(const char* url,
+ const char* title,
+ int visit_count,
+ int last_visit_ago,
+ int typed_count) {
+ URLRow row(GURL(url), 0);
+ row.set_title(UTF8ToUTF16(title));
+ row.set_visit_count(visit_count);
+ row.set_typed_count(typed_count);
+ row.set_last_visit(base::Time::NowFromSystemTime() -
+ base::TimeDelta::FromDays(last_visit_ago));
+ return row;
+ }
+
+ InMemoryURLIndex::String16Vector Make1Term(const char* term) {
+ InMemoryURLIndex::String16Vector terms;
+ terms.push_back(UTF8ToUTF16(term));
+ return terms;
+ }
+
+ InMemoryURLIndex::String16Vector Make2Terms(const char* term_1,
+ const char* term_2) {
+ InMemoryURLIndex::String16Vector terms;
+ terms.push_back(UTF8ToUTF16(term_1));
+ terms.push_back(UTF8ToUTF16(term_2));
+ return terms;
+ }
+
scoped_ptr<InMemoryURLIndex> url_index_;
};
+class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest {
+ protected:
+ FilePath::StringType TestDBName() const {
+ return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
+ }
+};
+
TEST_F(InMemoryURLIndexTest, Construction) {
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
EXPECT_TRUE(url_index_.get());
}
-// TODO(mrossetti): Write python script to calculate the validation criteria.
-TEST_F(InMemoryURLIndexTest, Initialization) {
+TEST_F(LimitedInMemoryURLIndexTest, Initialization) {
// Verify that the database contains the expected number of items, which
// is the pre-filtered count, i.e. all of the items.
sql::Statement statement(GetDB().GetUniqueStatement("SELECT * FROM urls;"));
EXPECT_TRUE(statement);
uint64 row_count = 0;
while (statement.Step()) ++row_count;
- EXPECT_EQ(row_count, 33U);
+ EXPECT_EQ(1U, row_count);
url_index_.reset(new InMemoryURLIndex);
url_index_->Init(this, "en,ja,hi,zh");
- EXPECT_EQ(url_index_->history_item_count_, 28);
+ EXPECT_EQ(1, url_index_->history_item_count_);
// history_info_map_ should have the same number of items as were filtered.
- EXPECT_EQ(url_index_->history_info_map_.size(), 28U);
- EXPECT_EQ(url_index_->char_word_map_.size(), 37U);
- EXPECT_EQ(url_index_->word_map_.size(), 91U);
+ EXPECT_EQ(1U, url_index_->history_info_map_.size());
+ EXPECT_EQ(36U, url_index_->char_word_map_.size());
+ EXPECT_EQ(17U, url_index_->word_map_.size());
}
TEST_F(InMemoryURLIndexTest, Retrieval) {
@@ -134,24 +171,35 @@
// See if a very specific term gives a single result.
terms.push_back(ASCIIToUTF16("DrudgeReport"));
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
+ ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ EXPECT_EQ(1U, matches.size());
+ // Verify that we got back the result we expected.
+ EXPECT_EQ(5, matches[0].url_info.id());
+ EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
+ EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
+
// Search which should result in multiple results.
terms.clear();
terms.push_back(ASCIIToUTF16("drudge"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 2U);
+ matches = url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, url_index_->HistoryItemsForTerms(terms).size());
// The results should be in descending score order.
EXPECT_GT(matches[0].raw_score, matches[1].raw_score);
// Search which should result in nearly perfect result.
terms.clear();
- terms.push_back(ASCIIToUTF16("http"));
+ terms.push_back(ASCIIToUTF16("https"));
terms.push_back(ASCIIToUTF16("NearlyPerfectResult"));
matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(matches.size(), 1U);
+ ASSERT_EQ(1U, matches.size());
// The results should have a very high score.
EXPECT_GT(matches[0].raw_score, 900);
+ EXPECT_EQ(32, matches[0].url_info.id());
+ EXPECT_EQ("https://nearlyperfectresult.com/",
+ matches[0].url_info.url().spec()); // Note: URL gets lowercased.
+ EXPECT_EQ(ASCIIToUTF16("Practically Perfect Search Result"),
+ matches[0].url_info.title());
// Search which should result in very poor result.
terms.clear();
@@ -159,11 +207,39 @@
terms.push_back(ASCIIToUTF16("y"));
terms.push_back(ASCIIToUTF16("x"));
matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(matches.size(), 1U);
+ ASSERT_EQ(1U, matches.size());
// The results should have a poor score.
EXPECT_LT(matches[0].raw_score, 200);
+ EXPECT_EQ(33, matches[0].url_info.id());
+ EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
+ matches[0].url_info.url().spec()); // Note: URL gets lowercased.
+ EXPECT_EQ(ASCIIToUTF16("Practically Useless Search Result"),
+ matches[0].url_info.title());
}
+TEST_F(InMemoryURLIndexTest, TitleSearch) {
+ url_index_.reset(new InMemoryURLIndex());
+ url_index_->Init(this, "en,ja,hi,zh");
+ // Signal if someone has changed the test DB.
+ EXPECT_EQ(28U, url_index_->history_info_map_.size());
+ InMemoryURLIndex::String16Vector terms;
+
+ // Ensure title is being searched.
+ terms.push_back(ASCIIToUTF16("MORTGAGE"));
+ terms.push_back(ASCIIToUTF16("RATE"));
+ terms.push_back(ASCIIToUTF16("DROPS"));
+ ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ EXPECT_EQ(1U, matches.size());
+
+ // Verify that we got back the result we expected.
+ EXPECT_EQ(1, matches[0].url_info.id());
+ EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
+ matches[0].url_info.url().spec());
+ EXPECT_EQ(ASCIIToUTF16(
+ "UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
+ matches[0].url_info.title());
+}
+
TEST_F(InMemoryURLIndexTest, Char16Utilities) {
string16 term = ASCIIToUTF16("drudgereport");
string16 expected = ASCIIToUTF16("drugepot");
@@ -179,6 +255,76 @@
EXPECT_EQ(*s_iter, *c_iter);
}
+TEST_F(InMemoryURLIndexTest, StaticFunctions) {
+ // Test WordVectorFromString16
+ string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy"));
+ InMemoryURLIndex::String16Vector string_vec =
+ InMemoryURLIndex::WordVectorFromString16(string_a, false);
+ EXPECT_EQ(7U, string_vec.size());
+ // See if we got the words we expected.
+ EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]);
+ EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]);
+ EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]);
+ EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]);
+ EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]);
+ EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]);
+ EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]);
+
+ string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true);
+ EXPECT_EQ(5U, string_vec.size());
+ EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]);
+ EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]);
+ EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]);
+ EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]);
+ EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]);
+
+ // Test WordSetFromString16
+ string16 string_b(ASCIIToUTF16(
+ "http://web.google.com/search Google Web Search"));
+ InMemoryURLIndex::String16Set string_set =
+ InMemoryURLIndex::WordSetFromString16(string_b);
+ EXPECT_EQ(5U, string_set.size());
+ // See if we got the words we expected.
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end());
+
+ // Test SortAndDeoverlap
+ TermMatches matches_a;
+ matches_a.push_back(TermMatch(1, 13, 10));
+ matches_a.push_back(TermMatch(2, 23, 10));
+ matches_a.push_back(TermMatch(3, 3, 10));
+ matches_a.push_back(TermMatch(4, 40, 5));
+ TermMatches matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a);
+ // Nothing should have been eliminated.
+ EXPECT_EQ(matches_a.size(), matches_b.size());
+ // The order should now be 3, 1, 2, 4.
+ EXPECT_EQ(3, matches_b[0].term_num);
+ EXPECT_EQ(1, matches_b[1].term_num);
+ EXPECT_EQ(2, matches_b[2].term_num);
+ EXPECT_EQ(4, matches_b[3].term_num);
+ matches_a.push_back(TermMatch(5, 18, 10));
+ matches_a.push_back(TermMatch(6, 38, 5));
+ matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a);
+ // Two matches should have been eliminated.
+ EXPECT_EQ(matches_a.size() - 2, matches_b.size());
+ // The order should now be 3, 1, 2, 6.
+ EXPECT_EQ(3, matches_b[0].term_num);
+ EXPECT_EQ(1, matches_b[1].term_num);
+ EXPECT_EQ(2, matches_b[2].term_num);
+ EXPECT_EQ(6, matches_b[3].term_num);
+
+ // Test MatchTermInString
+ TermMatches matches_c = InMemoryURLIndex::MatchTermInString(
+ UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123);
+ ASSERT_EQ(11U, matches_c.size());
+ const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 };
+ for (int i = 0; i < 11; ++i)
+ EXPECT_EQ(expected_offsets[i], matches_c[i].offset);
+}
+
TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
// Verify that match results for previously typed characters are retained
// (in the term_char_word_set_cache_) and reused, if possible, in future
@@ -190,22 +336,22 @@
InMemoryURLIndex::String16Vector terms;
string16 term = ASCIIToUTF16("drudgerepo");
terms.push_back(term);
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
{
// Exercise the term matching cache with the same term.
InMemoryURLIndex::Char16Vector uni_chars =
InMemoryURLIndex::Char16VectorFromString16(term);
EXPECT_EQ(uni_chars.size(), 7U); // Equivalent to 'degopru'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U);
+ EXPECT_EQ(6U, url_index_->CachedResultsIndexForTerm(uni_chars));
}
{
// Back off a character.
InMemoryURLIndex::Char16Vector uni_chars =
InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("drudgerep"));
- EXPECT_EQ(uni_chars.size(), 6U); // Equivalent to 'degpru'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 5U);
+ EXPECT_EQ(6U, uni_chars.size()); // Equivalent to 'degpru'
+ EXPECT_EQ(5U, url_index_->CachedResultsIndexForTerm(uni_chars));
}
{
@@ -213,20 +359,55 @@
InMemoryURLIndex::Char16Vector uni_chars =
InMemoryURLIndex::Char16VectorFromString16(
ASCIIToUTF16("drudgereporta"));
- EXPECT_EQ(uni_chars.size(), 9U); // Equivalent to 'adegoprtu'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U);
+ EXPECT_EQ(9U, uni_chars.size()); // Equivalent to 'adegoprtu'
+ EXPECT_EQ(6U, url_index_->CachedResultsIndexForTerm(uni_chars));
}
{
// Use different string.
InMemoryURLIndex::Char16Vector uni_chars =
InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("abcde"));
- EXPECT_EQ(uni_chars.size(), 5U);
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars),
- static_cast<size_t>(-1));
+ EXPECT_EQ(5U, uni_chars.size());
+ EXPECT_EQ(static_cast<size_t>(-1),
+ url_index_->CachedResultsIndexForTerm(uni_chars));
}
}
+TEST_F(InMemoryURLIndexTest, Scoring) {
+ URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 20, 0, 20));
+ // Test scores based on position.
+ ScoredHistoryMatch scored_a(
+ InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("abc")));
+ ScoredHistoryMatch scored_b(
+ InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("bcd")));
+ EXPECT_GT(scored_a.raw_score, scored_b.raw_score);
+ // Test scores based on length.
+ ScoredHistoryMatch scored_c(
+ InMemoryURLIndex::ScoredMatchForURL(row_a, Make1Term("abcd")));
+ EXPECT_LT(scored_a.raw_score, scored_c.raw_score);
+ // Test scores based on order.
+ ScoredHistoryMatch scored_d(
+ InMemoryURLIndex::ScoredMatchForURL(row_a, Make2Terms("abc", "def")));
+ ScoredHistoryMatch scored_e(
+ InMemoryURLIndex::ScoredMatchForURL(row_a, Make2Terms("def", "abc")));
+ EXPECT_GT(scored_d.raw_score, scored_e.raw_score);
+ // Test scores based on visit_count.
+ URLRow row_b(MakeURLRow("http://abcdef", "fedcba", 10, 0, 20));
+ ScoredHistoryMatch scored_f(
+ InMemoryURLIndex::ScoredMatchForURL(row_b, Make1Term("abc")));
+ EXPECT_GT(scored_a.raw_score, scored_f.raw_score);
+ // Test scores based on last_visit.
+ URLRow row_c(MakeURLRow("http://abcdef", "fedcba", 20, 2, 20));
+ ScoredHistoryMatch scored_g(
+ InMemoryURLIndex::ScoredMatchForURL(row_c, Make1Term("abc")));
+ EXPECT_GT(scored_a.raw_score, scored_g.raw_score);
+ // Test scores based on typed_count.
+ URLRow row_d(MakeURLRow("http://abcdef", "fedcba", 20, 0, 10));
+ ScoredHistoryMatch scored_h(
+ InMemoryURLIndex::ScoredMatchForURL(row_d, Make1Term("abc")));
+ EXPECT_GT(scored_a.raw_score, scored_h.raw_score);
+}
+
TEST_F(InMemoryURLIndexTest, AddNewRows) {
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
url_index_->Init(this, "en,ja,hi,zh");
@@ -245,11 +426,11 @@
url_index_->UpdateURL(new_row_id, new_row);
// Verify that we can retrieve it.
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
// Add it again just to be sure that is harmless.
url_index_->UpdateURL(new_row_id, new_row);
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
}
TEST_F(InMemoryURLIndexTest, DeleteRows) {
@@ -260,7 +441,7 @@
// Make sure we actually get an existing result.
terms.push_back(ASCIIToUTF16("DrudgeReport"));
ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- EXPECT_EQ(matches.size(), 1U);
+ ASSERT_EQ(1U, matches.size());
// Determine the row id for that result, delete that id, then search again.
url_index_->DeleteURL(matches[0].url_info.id());
@@ -311,7 +492,7 @@
// Clear and then prove it's clear.
url_index.ClearPrivateData();
- EXPECT_EQ(url_index.history_item_count_, 0);
+ EXPECT_EQ(0, url_index.history_item_count_);
EXPECT_TRUE(url_index.word_list_.empty());
EXPECT_TRUE(url_index.word_map_.empty());
EXPECT_TRUE(url_index.char_word_map_.empty());
« no previous file with comments | « chrome/browser/history/in_memory_url_index.cc ('k') | chrome/test/data/History/url_history_provider_test.db.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698