Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1097)

Unified Diff: chrome/browser/history/in_memory_url_index_unittest.cc

Issue 8526010: Improve Autocomplete Matches and Handling of Large Results Sets (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/history/in_memory_url_index_unittest.cc
===================================================================
--- chrome/browser/history/in_memory_url_index_unittest.cc (revision 110116)
+++ chrome/browser/history/in_memory_url_index_unittest.cc (working copy)
@@ -10,6 +10,7 @@
#include "base/string16.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
+#include "chrome/browser/autocomplete/autocomplete.h"
#include "chrome/browser/history/in_memory_database.h"
#include "chrome/browser/history/in_memory_url_index.h"
#include "chrome/browser/history/in_memory_url_index_types.h"
@@ -194,26 +195,6 @@
return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
}
-class ExpandedInMemoryURLIndexTest : public InMemoryURLIndexTest {
- protected:
- virtual void SetUp();
-};
-
-void ExpandedInMemoryURLIndexTest::SetUp() {
- InMemoryURLIndexTest::SetUp();
- // Add 600 more history items.
- // NOTE: Keep the string length constant at least the length of the format
- // string plus 5 to account for a 3 digit number and terminator.
- char url_format[] = "http://www.google.com/%d";
- const size_t kMaxLen = arraysize(url_format) + 5;
- char url_string[kMaxLen + 1];
- for (int i = 0; i < 600; ++i) {
- base::snprintf(url_string, kMaxLen, url_format, i);
- URLRow row(MakeURLRow(url_string, "Google Search", 20, 0, 20));
- AddURL(row);
- }
-}
-
TEST_F(InMemoryURLIndexTest, Construction) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
EXPECT_TRUE(url_index_.get());
@@ -244,64 +225,84 @@
// See if a very specific term gives a single result.
ScoredHistoryMatches matches =
- url_index_->HistoryItemsForTerms(Make1Term("DrudgeReport"));
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("DrudgeReport"));
Peter Kasting 2011/11/21 20:31:02 Nit: Can use ASCIIToUTF16() in most of these.
mrossetti 2011/11/21 21:38:25 Ah! Why did I forget that??? Thanks.
ASSERT_EQ(1U, matches.size());
// Verify that we got back the result we expected.
EXPECT_EQ(5, matches[0].url_info.id());
EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
- EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
+ EXPECT_EQ(UTF8ToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
// Search which should result in multiple results.
- matches = url_index_->HistoryItemsForTerms(Make1Term("drudge"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("drudge"));
ASSERT_EQ(2U, matches.size());
// The results should be in descending score order.
EXPECT_GE(matches[0].raw_score, matches[1].raw_score);
// Search which should result in nearly perfect result.
- matches = url_index_->HistoryItemsForTerms(Make2Terms("https",
- "NearlyPerfectResult"));
+ matches = url_index_->HistoryItemsForTerms(
+ UTF8ToUTF16("https NearlyPerfectResult"));
ASSERT_EQ(1U, matches.size());
// The results should have a very high score.
EXPECT_GT(matches[0].raw_score, 900);
EXPECT_EQ(32, matches[0].url_info.id());
EXPECT_EQ("https://nearlyperfectresult.com/",
matches[0].url_info.url().spec()); // Note: URL gets lowercased.
- EXPECT_EQ(ASCIIToUTF16("Practically Perfect Search Result"),
+ EXPECT_EQ(UTF8ToUTF16("Practically Perfect Search Result"),
matches[0].url_info.title());
// Search which should result in very poor result.
- String16Vector original_terms;
- original_terms.push_back(ASCIIToUTF16("z"));
- original_terms.push_back(ASCIIToUTF16("y"));
- original_terms.push_back(ASCIIToUTF16("x"));
- matches = url_index_->HistoryItemsForTerms(original_terms);
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("z y x"));
ASSERT_EQ(1U, matches.size());
// The results should have a poor score.
EXPECT_LT(matches[0].raw_score, 500);
EXPECT_EQ(33, matches[0].url_info.id());
EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
matches[0].url_info.url().spec()); // Note: URL gets lowercased.
- EXPECT_EQ(ASCIIToUTF16("Practically Useless Search Result"),
+ EXPECT_EQ(UTF8ToUTF16("Practically Useless Search Result"),
matches[0].url_info.title());
// Search which will match at the end of an URL with encoded characters.
- matches = url_index_->HistoryItemsForTerms(Make1Term("ice"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ice"));
ASSERT_EQ(1U, matches.size());
}
-TEST_F(ExpandedInMemoryURLIndexTest, ShortCircuit) {
+TEST_F(InMemoryURLIndexTest, ProperStringMatching) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- // A search for 'w' should short-circuit and not return any matches.
+ // Search for the following with the expected results:
+ // "atdmt view" - found
+ // "atdmt.view" - not found
+ // "view.atdmt" - found
ScoredHistoryMatches matches =
- url_index_->HistoryItemsForTerms(Make1Term("w"));
- EXPECT_TRUE(matches.empty());
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("atdmt view"));
+ ASSERT_EQ(1U, matches.size());
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("atdmt.view"));
+ ASSERT_EQ(0U, matches.size());
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("view.atdmt"));
+ ASSERT_EQ(1U, matches.size());
+}
- // A search for 'working' should not short-circuit.
- matches = url_index_->HistoryItemsForTerms(Make1Term("working"));
- EXPECT_EQ(1U, matches.size());
+TEST_F(InMemoryURLIndexTest, HugeResultSet) {
+ url_index_.reset(new InMemoryURLIndex(FilePath()));
+ url_index_->Init(this, "en,ja,hi,zh");
+
+ // Create a huge set of qualifying history items.
+ for (URLID row_id = 5000; row_id < 6000; ++row_id) {
+ URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), row_id);
+ new_row.set_last_visit(base::Time::Now());
+ url_index_->UpdateURL(row_id, new_row);
+ }
+
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("b"));
+ ASSERT_EQ(AutocompleteProvider::kMaxMatches, matches.size());
+ // There are 7 matches already in the database.
+ ASSERT_EQ(1007U, url_index_->pre_filter_item_count);
+ ASSERT_EQ(500U, url_index_->post_filter_item_count);
+ ASSERT_EQ(AutocompleteProvider::kMaxMatches,
+ url_index_->post_scoring_item_count);
}
TEST_F(InMemoryURLIndexTest, TitleSearch) {
@@ -309,21 +310,17 @@
url_index_->Init(this, "en,ja,hi,zh");
// Signal if someone has changed the test DB.
EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size());
- String16Vector original_terms;
// Ensure title is being searched.
- original_terms.push_back(ASCIIToUTF16("MORTGAGE"));
- original_terms.push_back(ASCIIToUTF16("RATE"));
- original_terms.push_back(ASCIIToUTF16("DROPS"));
ScoredHistoryMatches matches =
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("MORTGAGE RATE DROPS"));
ASSERT_EQ(1U, matches.size());
// Verify that we got back the result we expected.
EXPECT_EQ(1, matches[0].url_info.id());
EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
matches[0].url_info.url().spec());
- EXPECT_EQ(ASCIIToUTF16(
+ EXPECT_EQ(UTF8ToUTF16(
"UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
matches[0].url_info.title());
}
@@ -333,12 +330,8 @@
url_index_->Init(this, "en,ja,hi,zh");
// Verify current title terms retrieves desired item.
- String16Vector original_terms;
- original_terms.push_back(ASCIIToUTF16("lebronomics"));
- original_terms.push_back(ASCIIToUTF16("could"));
- original_terms.push_back(ASCIIToUTF16("high"));
- original_terms.push_back(ASCIIToUTF16("taxes"));
- original_terms.push_back(ASCIIToUTF16("influence"));
+ string16 original_terms =
+ UTF8ToUTF16("lebronomics could high taxes influence");
ScoredHistoryMatches matches =
url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, matches.size());
@@ -348,24 +341,18 @@
EXPECT_EQ(expected_id, matches[0].url_info.id());
EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx",
matches[0].url_info.url().spec());
- EXPECT_EQ(ASCIIToUTF16(
+ EXPECT_EQ(UTF8ToUTF16(
"LeBronomics: Could High Taxes Influence James' Team Decision?"),
matches[0].url_info.title());
URLRow old_row(matches[0].url_info);
// Verify new title terms retrieves nothing.
- String16Vector new_terms;
- new_terms.push_back(ASCIIToUTF16("does"));
- new_terms.push_back(ASCIIToUTF16("eat"));
- new_terms.push_back(ASCIIToUTF16("oats"));
- new_terms.push_back(ASCIIToUTF16("little"));
- new_terms.push_back(ASCIIToUTF16("lambs"));
- new_terms.push_back(ASCIIToUTF16("ivy"));
+ string16 new_terms = UTF8ToUTF16("does eat oats little lambs ivy");
matches = url_index_->HistoryItemsForTerms(new_terms);
ASSERT_EQ(0U, matches.size());
// Update the row.
- old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy"));
+ old_row.set_title(UTF8ToUTF16("Does eat oats and little lambs eat ivy"));
url_index_->UpdateURL(expected_id, old_row);
// Verify we get the row using the new terms but not the original terms.
@@ -383,25 +370,25 @@
// The presence of duplicate characters should succeed. Exercise by cycling
// through a string with several duplicate characters.
ScoredHistoryMatches matches =
- url_index_->HistoryItemsForTerms(Make1Term("ABRA"));
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRA"));
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(28, matches[0].url_info.id());
EXPECT_EQ("http://www.ddj.com/windows/184416623",
matches[0].url_info.url().spec());
- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACAD"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACAD"));
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(28, matches[0].url_info.id());
- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABRA"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACADABRA"));
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(28, matches[0].url_info.id());
- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABR"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACADABR"));
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(28, matches[0].url_info.id());
- matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACA"));
+ matches = url_index_->HistoryItemsForTerms(UTF8ToUTF16("ABRACA"));
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(28, matches[0].url_info.id());
}
@@ -424,60 +411,44 @@
// Now simulate typing search terms into the omnibox and check the state of
// the cache as each item is 'typed'.
- // Simulate typing "r" giving "r" in the simulated omnibox. The results for
+ // Simulate typing "r" giving "r" in the simulated omnibox. The results for
// 'r' will be not cached because it is only 1 character long.
- String16Vector original_terms;
- string16 term_r = ASCIIToUTF16("r");
- original_terms.push_back(term_r);
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r"));
EXPECT_EQ(0U, cache.size());
// Simulate typing "re" giving "r re" in the simulated omnibox.
- string16 term_re = ASCIIToUTF16("re");
- original_terms.push_back(term_re);
// 're' should be cached at this point but not 'r' as it is a single
// character.
- ASSERT_EQ(2U, original_terms.size());
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r re"));
ASSERT_EQ(1U, cache.size());
- CheckTerm(cache, term_re);
+ CheckTerm(cache, UTF8ToUTF16("re"));
// Simulate typing "reco" giving "r re reco" in the simulated omnibox.
- string16 term_reco = ASCIIToUTF16("reco");
- original_terms.push_back(term_reco);
// 're' and 'reco' should be cached at this point but not 'r' as it is a
// single character.
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("r re reco"));
ASSERT_EQ(2U, cache.size());
- CheckTerm(cache, term_re);
- CheckTerm(cache, term_reco);
+ CheckTerm(cache, UTF8ToUTF16("re"));
+ CheckTerm(cache, UTF8ToUTF16("reco"));
- original_terms.clear(); // Simulate pressing <ESC>.
-
// Simulate typing "mort".
- string16 term_mort = ASCIIToUTF16("mort");
- original_terms.push_back(term_mort);
// Since we now have only one search term, the cached results for 're' and
// 'reco' should be purged, giving us only 1 item in the cache (for 'mort').
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort"));
ASSERT_EQ(1U, cache.size());
- CheckTerm(cache, term_mort);
+ CheckTerm(cache, UTF8ToUTF16("mort"));
// Simulate typing "reco" giving "mort reco" in the simulated omnibox.
- original_terms.push_back(term_reco);
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort reco"));
ASSERT_EQ(2U, cache.size());
- CheckTerm(cache, term_mort);
- CheckTerm(cache, term_reco);
+ CheckTerm(cache, UTF8ToUTF16("mort"));
+ CheckTerm(cache, UTF8ToUTF16("reco"));
// Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.
- original_terms.resize(original_terms.size() - 1);
- string16 term_rec = ASCIIToUTF16("rec");
- original_terms.push_back(term_rec);
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("mort rec"));
ASSERT_EQ(2U, cache.size());
- CheckTerm(cache, term_mort);
- CheckTerm(cache, term_rec);
+ CheckTerm(cache, UTF8ToUTF16("mort"));
+ CheckTerm(cache, UTF8ToUTF16("rec"));
}
TEST_F(InMemoryURLIndexTest, Scoring) {
@@ -518,14 +489,13 @@
TEST_F(InMemoryURLIndexTest, AddNewRows) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- String16Vector original_terms;
// Verify that the row we're going to add does not already exist.
URLID new_row_id = 87654321;
// Newly created URLRows get a last_visit time of 'right now' so it should
// qualify as a quick result candidate.
- original_terms.push_back(ASCIIToUTF16("brokeandalone"));
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());
+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(
+ UTF8ToUTF16("brokeandalone")).empty());
// Add a new row.
URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id);
@@ -533,27 +503,28 @@
url_index_->UpdateURL(new_row_id, new_row);
// Verify that we can retrieve it.
- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(
+ UTF8ToUTF16("brokeandalone")).size());
// Add it again just to be sure that is harmless.
url_index_->UpdateURL(new_row_id, new_row);
- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(
+ UTF8ToUTF16("brokeandalone")).size());
}
TEST_F(InMemoryURLIndexTest, DeleteRows) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- String16Vector original_terms;
// Make sure we actually get an existing result.
- original_terms.push_back(ASCIIToUTF16("DrudgeReport"));
ScoredHistoryMatches matches =
- url_index_->HistoryItemsForTerms(original_terms);
+ url_index_->HistoryItemsForTerms(UTF8ToUTF16("DrudgeReport"));
ASSERT_EQ(1U, matches.size());
// Determine the row id for that result, delete that id, then search again.
url_index_->DeleteURL(matches[0].url_info.id());
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());
+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(
+ UTF8ToUTF16("DrudgeReport")).empty());
}
TEST_F(InMemoryURLIndexTest, WhitelistedURLs) {

Powered by Google App Engine
This is Rietveld 408576698