chrome/browser/history/in_memory_url_index_unittest.cc - Issue 8526010: Improve Autocomplete Matches and Handling of Large Results Sets

Unified Diff: chrome/browser/history/in_memory_url_index_unittest.cc

Issue 8526010: Improve Autocomplete Matches and Handling of Large Results Sets (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/in_memory_url_index_unittest.cc

===================================================================

--- chrome/browser/history/in_memory_url_index_unittest.cc (revision 110116)

+++ chrome/browser/history/in_memory_url_index_unittest.cc (working copy)

@@ -10,6 +10,7 @@

#include "base/string16.h"

#include "base/string_util.h"

#include "base/utf_string_conversions.h"

+#include "chrome/browser/autocomplete/autocomplete.h"

#include "chrome/browser/history/in_memory_database.h"

#include "chrome/browser/history/in_memory_url_index.h"

#include "chrome/browser/history/in_memory_url_index_types.h"

@@ -194,26 +195,6 @@

return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");

}

-class ExpandedInMemoryURLIndexTest : public InMemoryURLIndexTest {

- protected:

- virtual void SetUp();

-};

-void ExpandedInMemoryURLIndexTest::SetUp() {

- InMemoryURLIndexTest::SetUp();

- // Add 600 more history items.

- // NOTE: Keep the string length constant at least the length of the format

- // string plus 5 to account for a 3 digit number and terminator.

- char url_format[] = "http://www.google.com/%d";

- const size_t kMaxLen = arraysize(url_format) + 5;

- char url_string[kMaxLen + 1];

- for (int i = 0; i < 600; ++i) {

- base::snprintf(url_string, kMaxLen, url_format, i);

- URLRow row(MakeURLRow(url_string, "Google Search", 20, 0, 20));

- AddURL(row);

- }

TEST_F(InMemoryURLIndexTest, Construction) {

url_index_.reset(new InMemoryURLIndex(FilePath()));

EXPECT_TRUE(url_index_.get());

@@ -244,64 +225,84 @@

// See if a very specific term gives a single result.

ScoredHistoryMatches matches =

- url_index_->HistoryItemsForTerms(Make1Term("DrudgeReport"));

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("DrudgeReport"));

Peter Kasting 2011/11/21 20:31:02 Nit: Can use ASCIIToUTF16() in most of these.

mrossetti 2011/11/21 21:38:25 Ah! Why did I forget that??? Thanks.

ASSERT_EQ(1U, matches.size());

// Verify that we got back the result we expected.

EXPECT_EQ(5, matches[0].url_info.id());

EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());

- EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());

+ EXPECT_EQ(UTF8ToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());

// Search which should result in multiple results.

- matches = url_index_->HistoryItemsForTerms(Make1Term("drudge"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("drudge"));

ASSERT_EQ(2U, matches.size());

// The results should be in descending score order.

EXPECT_GE(matches[0].raw_score, matches[1].raw_score);

// Search which should result in nearly perfect result.

- matches = url_index_->HistoryItemsForTerms(Make2Terms("https",

- "NearlyPerfectResult"));

+ matches = url_index_->HistoryItemsForTerms(

+ UTF8ToUTF16("https NearlyPerfectResult"));

ASSERT_EQ(1U, matches.size());

// The results should have a very high score.

EXPECT_GT(matches[0].raw_score, 900);

EXPECT_EQ(32, matches[0].url_info.id());

EXPECT_EQ("https://nearlyperfectresult.com/",

matches[0].url_info.url().spec()); // Note: URL gets lowercased.

- EXPECT_EQ(ASCIIToUTF16("Practically Perfect Search Result"),

+ EXPECT_EQ(UTF8ToUTF16("Practically Perfect Search Result"),

matches[0].url_info.title());

// Search which should result in very poor result.

- String16Vector original_terms;

- original_terms.push_back(ASCIIToUTF16("z"));

- original_terms.push_back(ASCIIToUTF16("y"));

- original_terms.push_back(ASCIIToUTF16("x"));

- matches = url_index_->HistoryItemsForTerms(original_terms);

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("z y x"));

ASSERT_EQ(1U, matches.size());

// The results should have a poor score.

EXPECT_LT(matches[0].raw_score, 500);

EXPECT_EQ(33, matches[0].url_info.id());

EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",

matches[0].url_info.url().spec()); // Note: URL gets lowercased.

- EXPECT_EQ(ASCIIToUTF16("Practically Useless Search Result"),

+ EXPECT_EQ(UTF8ToUTF16("Practically Useless Search Result"),

matches[0].url_info.title());

// Search which will match at the end of an URL with encoded characters.

- matches = url_index_->HistoryItemsForTerms(Make1Term("ice"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ice"));

ASSERT_EQ(1U, matches.size());

}

-TEST_F(ExpandedInMemoryURLIndexTest, ShortCircuit) {

+TEST_F(InMemoryURLIndexTest, ProperStringMatching) {

url_index_.reset(new InMemoryURLIndex(FilePath()));

url_index_->Init(this, "en,ja,hi,zh");

- // A search for 'w' should short-circuit and not return any matches.

+ // Search for the following with the expected results:

+ // "atdmt view" - found

+ // "atdmt.view" - not found

+ // "view.atdmt" - found

ScoredHistoryMatches matches =

- url_index_->HistoryItemsForTerms(Make1Term("w"));

- EXPECT_TRUE(matches.empty());

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("atdmt view"));

+ ASSERT_EQ(1U, matches.size());

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("atdmt.view"));

+ ASSERT_EQ(0U, matches.size());

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("view.atdmt"));

+ ASSERT_EQ(1U, matches.size());

- // A search for 'working' should not short-circuit.

- matches = url_index_->HistoryItemsForTerms(Make1Term("working"));

- EXPECT_EQ(1U, matches.size());

+TEST_F(InMemoryURLIndexTest, HugeResultSet) {

+ url_index_.reset(new InMemoryURLIndex(FilePath()));

+ url_index_->Init(this, "en,ja,hi,zh");

+ // Create a huge set of qualifying history items.

+ for (URLID row_id = 5000; row_id < 6000; ++row_id) {

+ URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), row_id);

+ new_row.set_last_visit(base::Time::Now());

+ url_index_->UpdateURL(row_id, new_row);

+ }

+ ScoredHistoryMatches matches =

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("b"));

+ ASSERT_EQ(AutocompleteProvider::kMaxMatches, matches.size());

+ // There are 7 matches already in the database.

+ ASSERT_EQ(1007U, url_index_->pre_filter_item_count);

+ ASSERT_EQ(500U, url_index_->post_filter_item_count);

+ ASSERT_EQ(AutocompleteProvider::kMaxMatches,

+ url_index_->post_scoring_item_count);

}

TEST_F(InMemoryURLIndexTest, TitleSearch) {

@@ -309,21 +310,17 @@

url_index_->Init(this, "en,ja,hi,zh");

// Signal if someone has changed the test DB.

EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size());

- String16Vector original_terms;

// Ensure title is being searched.

- original_terms.push_back(ASCIIToUTF16("MORTGAGE"));

- original_terms.push_back(ASCIIToUTF16("RATE"));

- original_terms.push_back(ASCIIToUTF16("DROPS"));

ScoredHistoryMatches matches =

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("MORTGAGE RATE DROPS"));

ASSERT_EQ(1U, matches.size());

// Verify that we got back the result we expected.

EXPECT_EQ(1, matches[0].url_info.id());

EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",

matches[0].url_info.url().spec());

- EXPECT_EQ(ASCIIToUTF16(

+ EXPECT_EQ(UTF8ToUTF16(

"UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),

matches[0].url_info.title());

}

@@ -333,12 +330,8 @@

url_index_->Init(this, "en,ja,hi,zh");

// Verify current title terms retrieves desired item.

- String16Vector original_terms;

- original_terms.push_back(ASCIIToUTF16("lebronomics"));

- original_terms.push_back(ASCIIToUTF16("could"));

- original_terms.push_back(ASCIIToUTF16("high"));

- original_terms.push_back(ASCIIToUTF16("taxes"));

- original_terms.push_back(ASCIIToUTF16("influence"));

+ string16 original_terms =

+ UTF8ToUTF16("lebronomics could high taxes influence");

ScoredHistoryMatches matches =

url_index_->HistoryItemsForTerms(original_terms);

ASSERT_EQ(1U, matches.size());

@@ -348,24 +341,18 @@

EXPECT_EQ(expected_id, matches[0].url_info.id());

EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx",

matches[0].url_info.url().spec());

- EXPECT_EQ(ASCIIToUTF16(

+ EXPECT_EQ(UTF8ToUTF16(

"LeBronomics: Could High Taxes Influence James' Team Decision?"),

matches[0].url_info.title());

URLRow old_row(matches[0].url_info);

// Verify new title terms retrieves nothing.

- String16Vector new_terms;

- new_terms.push_back(ASCIIToUTF16("does"));

- new_terms.push_back(ASCIIToUTF16("eat"));

- new_terms.push_back(ASCIIToUTF16("oats"));

- new_terms.push_back(ASCIIToUTF16("little"));

- new_terms.push_back(ASCIIToUTF16("lambs"));

- new_terms.push_back(ASCIIToUTF16("ivy"));

+ string16 new_terms = UTF8ToUTF16("does eat oats little lambs ivy");

matches = url_index_->HistoryItemsForTerms(new_terms);

ASSERT_EQ(0U, matches.size());

// Update the row.

- old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy"));

+ old_row.set_title(UTF8ToUTF16("Does eat oats and little lambs eat ivy"));

url_index_->UpdateURL(expected_id, old_row);

// Verify we get the row using the new terms but not the original terms.

@@ -383,25 +370,25 @@

// The presence of duplicate characters should succeed. Exercise by cycling

// through a string with several duplicate characters.

ScoredHistoryMatches matches =

- url_index_->HistoryItemsForTerms(Make1Term("ABRA"));

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRA"));

ASSERT_EQ(1U, matches.size());

EXPECT_EQ(28, matches[0].url_info.id());

EXPECT_EQ("http://www.ddj.com/windows/184416623",

matches[0].url_info.url().spec());

- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACAD"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACAD"));

ASSERT_EQ(1U, matches.size());

EXPECT_EQ(28, matches[0].url_info.id());

- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABRA"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACADABRA"));

ASSERT_EQ(1U, matches.size());

EXPECT_EQ(28, matches[0].url_info.id());

- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABR"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACADABR"));

ASSERT_EQ(1U, matches.size());

EXPECT_EQ(28, matches[0].url_info.id());

- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACA"));

+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACA"));

ASSERT_EQ(1U, matches.size());

EXPECT_EQ(28, matches[0].url_info.id());

}

@@ -424,60 +411,44 @@

// Now simulate typing search terms into the omnibox and check the state of

// the cache as each item is 'typed'.

- // Simulate typing "r" giving "r" in the simulated omnibox. The results for

+ // Simulate typing "r" giving "r" in the simulated omnibox. The results for

// 'r' will be not cached because it is only 1 character long.

- String16Vector original_terms;

- string16 term_r = ASCIIToUTF16("r");

- original_terms.push_back(term_r);

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r"));

EXPECT_EQ(0U, cache.size());

// Simulate typing "re" giving "r re" in the simulated omnibox.

- string16 term_re = ASCIIToUTF16("re");

- original_terms.push_back(term_re);

// 're' should be cached at this point but not 'r' as it is a single

// character.

- ASSERT_EQ(2U, original_terms.size());

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r re"));

ASSERT_EQ(1U, cache.size());

- CheckTerm(cache, term_re);

+ CheckTerm(cache, UTF8ToUTF16("re"));

// Simulate typing "reco" giving "r re reco" in the simulated omnibox.

- string16 term_reco = ASCIIToUTF16("reco");

- original_terms.push_back(term_reco);

// 're' and 'reco' should be cached at this point but not 'r' as it is a

// single character.

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r re reco"));

ASSERT_EQ(2U, cache.size());

- CheckTerm(cache, term_re);

- CheckTerm(cache, term_reco);

+ CheckTerm(cache, UTF8ToUTF16("re"));

+ CheckTerm(cache, UTF8ToUTF16("reco"));

- original_terms.clear(); // Simulate pressing <ESC>.

// Simulate typing "mort".

- string16 term_mort = ASCIIToUTF16("mort");

- original_terms.push_back(term_mort);

// Since we now have only one search term, the cached results for 're' and

// 'reco' should be purged, giving us only 1 item in the cache (for 'mort').

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort"));

ASSERT_EQ(1U, cache.size());

- CheckTerm(cache, term_mort);

+ CheckTerm(cache, UTF8ToUTF16("mort"));

// Simulate typing "reco" giving "mort reco" in the simulated omnibox.

- original_terms.push_back(term_reco);

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort reco"));

ASSERT_EQ(2U, cache.size());

- CheckTerm(cache, term_mort);

- CheckTerm(cache, term_reco);

+ CheckTerm(cache, UTF8ToUTF16("mort"));

+ CheckTerm(cache, UTF8ToUTF16("reco"));

// Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.

- original_terms.resize(original_terms.size() - 1);

- string16 term_rec = ASCIIToUTF16("rec");

- original_terms.push_back(term_rec);

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort rec"));

ASSERT_EQ(2U, cache.size());

- CheckTerm(cache, term_mort);

- CheckTerm(cache, term_rec);

+ CheckTerm(cache, UTF8ToUTF16("mort"));

+ CheckTerm(cache, UTF8ToUTF16("rec"));

}

TEST_F(InMemoryURLIndexTest, Scoring) {

@@ -518,14 +489,13 @@

TEST_F(InMemoryURLIndexTest, AddNewRows) {

url_index_.reset(new InMemoryURLIndex(FilePath()));

url_index_->Init(this, "en,ja,hi,zh");

- String16Vector original_terms;

// Verify that the row we're going to add does not already exist.

URLID new_row_id = 87654321;

// Newly created URLRows get a last_visit time of 'right now' so it should

// qualify as a quick result candidate.

- original_terms.push_back(ASCIIToUTF16("brokeandalone"));

- EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());

+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(

+ UTF8ToUTF16("brokeandalone")).empty());

// Add a new row.

URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id);

@@ -533,27 +503,28 @@

url_index_->UpdateURL(new_row_id, new_row);

// Verify that we can retrieve it.

- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());

+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(

+ UTF8ToUTF16("brokeandalone")).size());

// Add it again just to be sure that is harmless.

url_index_->UpdateURL(new_row_id, new_row);

- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());

+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(

+ UTF8ToUTF16("brokeandalone")).size());

}

TEST_F(InMemoryURLIndexTest, DeleteRows) {

url_index_.reset(new InMemoryURLIndex(FilePath()));

url_index_->Init(this, "en,ja,hi,zh");

- String16Vector original_terms;

// Make sure we actually get an existing result.

- original_terms.push_back(ASCIIToUTF16("DrudgeReport"));

ScoredHistoryMatches matches =

- url_index_->HistoryItemsForTerms(original_terms);

+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("DrudgeReport"));

ASSERT_EQ(1U, matches.size());

// Determine the row id for that result, delete that id, then search again.

url_index_->DeleteURL(matches[0].url_info.id());

- EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());

+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(

+ UTF8ToUTF16("DrudgeReport")).empty());

}

TEST_F(InMemoryURLIndexTest, WhitelistedURLs) {

« chrome/browser/history/in_memory_url_index.cc ('K') | « chrome/browser/history/in_memory_url_index_types.h ('k') | no next file » | no next file with comments »