trunk/src/chrome/browser/history/history_backend.cc - Issue 19637009: Revert 212459 "Remove TextDatabase from the history service."

Unified Diff: trunk/src/chrome/browser/history/history_backend.cc

Issue 19637009: Revert 212459 "Remove TextDatabase from the history service." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « trunk/src/chrome/browser/history/history_backend.h ('k') | trunk/src/chrome/browser/history/history_backend_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: trunk/src/chrome/browser/history/history_backend.cc

===================================================================

--- trunk/src/chrome/browser/history/history_backend.cc (revision 212472)

+++ trunk/src/chrome/browser/history/history_backend.cc (working copy)

@@ -14,7 +14,6 @@

#include "base/basictypes.h"

#include "base/bind.h"

#include "base/compiler_specific.h"

-#include "base/files/file_enumerator.h"

#include "base/memory/scoped_ptr.h"

#include "base/memory/scoped_vector.h"

#include "base/message_loop/message_loop.h"

@@ -69,6 +68,10 @@

(this does not store visit segments as they expire after 3 mos.)

+ TextDatabaseManager (manages multiple text database for different times)

+ TextDatabase (represents a single month of full-text index).

+ ...more TextDatabase objects...

ExpireHistoryBackend (manages moving things from HistoryDatabase to

the ArchivedDatabase and deleting)

@@ -165,6 +168,53 @@

scoped_refptr<HistoryBackend> history_backend_;

};

+// Handles querying first the main database, then the full text database if that

+// fails. It will optionally keep track of all URLs seen so duplicates can be

+// eliminated. This is used by the querying sub-functions.

+//

+// TODO(brettw): This class may be able to be simplified or eliminated. After

+// this was written, QueryResults can efficiently look up by URL, so the need

+// for this extra set of previously queried URLs is less important.

+class HistoryBackend::URLQuerier {

+ public:

+ URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique)

+ : main_db_(main_db),

+ archived_db_(archived_db),

+ track_unique_(track_unique) {

+ }

+ // When we're tracking unique URLs, returns true if this URL has been

+ // previously queried. Only call when tracking unique URLs.

+ bool HasURL(const GURL& url) {

+ DCHECK(track_unique_);

+ return unique_urls_.find(url) != unique_urls_.end();

+ }

+ bool GetRowForURL(const GURL& url, URLRow* row) {

+ if (!main_db_->GetRowForURL(url, row)) {

+ if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) {

+ // This row is neither in the main nor the archived DB.

+ return false;

+ }

+ if (track_unique_)

+ unique_urls_.insert(url);

+ return true;

+ }

+ private:

+ URLDatabase* main_db_; // Guaranteed non-NULL.

+ URLDatabase* archived_db_; // Possibly NULL.

+ bool track_unique_;

+ // When track_unique_ is set, this is updated with every URL seen so far.

+ std::set<GURL> unique_urls_;

+ DISALLOW_COPY_AND_ASSIGN(URLQuerier);

+};

// HistoryBackend --------------------------------------------------------------

HistoryBackend::HistoryBackend(const base::FilePath& history_dir,

@@ -532,7 +582,7 @@

}

// Last, save this redirect chain for later so we can set titles & favicons

- // on the redirected pages properly.

+ // on the redirected pages properly. It is indexed by the destination page.

recent_redirects_.Put(request.url, redirects);

}

@@ -550,6 +600,11 @@

last_ids.second);

}

+ if (text_database_) {

+ text_database_->AddPageURL(request.url, last_ids.first, last_ids.second,

+ request.time);

+ }

ScheduleCommit();

}

@@ -562,14 +617,12 @@

TimeTicks beginning_time = TimeTicks::Now();

- // Compute the file names.

+ // Compute the file names. Note that the index file can be removed when the

+ // text db manager is finished being hooked up.

base::FilePath history_name = history_dir_.Append(chrome::kHistoryFilename);

base::FilePath thumbnail_name = GetThumbnailFileName();

base::FilePath archived_name = GetArchivedFileName();

- // Delete the old index database files which are no longer used.

- DeleteFTSIndexDatabases();

// History database.

db_.reset(new HistoryDatabase());

@@ -609,8 +662,8 @@

delete mem_backend; // Error case, run without the in-memory DB.

db_->BeginExclusiveMode(); // Must be after the mem backend read the data.

- // Create the history publisher which needs to be passed on to the thumbnail

- // database for publishing history.

+ // Create the history publisher which needs to be passed on to the text and

+ // thumbnail databases for publishing history.

history_publisher_.reset(new HistoryPublisher());

if (!history_publisher_->Init()) {

// The init may fail when there are no indexers wanting our history.

@@ -618,6 +671,22 @@

history_publisher_.reset();

}

+ // Full-text database. This has to be first so we can pass it to the

+ // HistoryDatabase for migration.

+ text_database_.reset(new TextDatabaseManager(history_dir_,

+ db_.get(), db_.get()));

+ if (!text_database_->Init(history_publisher_.get())) {

+ LOG(WARNING) << "Text database initialization failed, running without it.";

+ text_database_.reset();

+ }

+ if (db_->needs_version_17_migration()) {

+ // See needs_version_17_migration() decl for more. In this case, we want

+ // to erase all the text database files. This must be done after the text

+ // database manager has been initialized, since it knows about all the

+ // files it manages.

+ text_database_->DeleteAll();

+ }

// Thumbnail database.

thumbnail_db_.reset(new ThumbnailDatabase());

if (!db_->GetNeedsThumbnailMigration()) {

@@ -670,7 +739,7 @@

// The main DB initialization should intuitively be first (not that it

// actually matters) and the expirer should be set last.

expirer_.SetDatabases(db_.get(), archived_db_.get(),

- thumbnail_db_.get());

+ thumbnail_db_.get(), text_database_.get());

// Open the long-running transaction.

db_->BeginTransaction();

@@ -678,6 +747,8 @@

thumbnail_db_->BeginTransaction();

if (archived_db_)

archived_db_->BeginTransaction();

+ if (text_database_)

+ text_database_->BeginTransaction();

// Get the first item in our database.

db_->GetStartDate(&first_recorded_time_);

@@ -723,6 +794,10 @@

archived_db_->CommitTransaction();

archived_db_.reset();

}

+ if (text_database_) {

+ text_database_->CommitTransaction();

+ text_database_.reset();

+ }

}

std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(

@@ -786,6 +861,14 @@

return std::make_pair(0, 0);

}

url_info.id_ = url_id;

+ // We don't actually add the URL to the full text index at this point. It

+ // might be nice to do this so that even if we get no title or body, the

+ // user can search for URL components and get the page.

+ //

+ // However, in most cases, we'll get at least a title and usually contents,

+ // and this add will be redundant, slowing everything down. As a result,

+ // we ignore this edge case.

}

// Add the visit with the time to the database.

@@ -855,6 +938,26 @@

}

+ // Add the page to the full text index. This function is also used for

+ // importing. Even though we don't have page contents, we can at least

+ // add the title and URL to the index so they can be searched. We don't

+ // bother to delete any already-existing FTS entries for the URL, since

+ // this is normally called on import.

+ //

+ // If you ever import *after* first run (selecting import from the menu),

+ // then these additional entries will "shadow" the originals when querying

+ // for the most recent match only, and the user won't get snippets. This is

+ // a very minor issue, and fixing it will make import slower, so we don't

+ // bother.

+ bool has_indexed = false;

+ if (text_database_) {

+ // We do not have to make it update the visit database, below, we will

+ // create the visit entry with the indexed flag set.

+ has_indexed = text_database_->AddPageData(i->url(), url_id, 0,

+ i->last_visit(),

+ i->title(), string16());

+ }

// Sync code manages the visits itself.

if (visit_source != SOURCE_SYNCED) {

// Make up a visit to correspond to the last visit to the page.

@@ -863,6 +966,7 @@

content::PAGE_TRANSITION_LINK |

content::PAGE_TRANSITION_CHAIN_START |

content::PAGE_TRANSITION_CHAIN_END), 0);

+ visit_info.is_indexed = has_indexed;

if (!visit_database->AddVisit(&visit_info, visit_source)) {

NOTREACHED() << "Adding visit failed.";

return;

@@ -897,6 +1001,10 @@

if (!db_)

return;

+ // Update the full text index.

+ if (text_database_)

+ text_database_->AddPageTitle(url, title);

// Search for recent redirects which should get the same title. We make a

// dummy list containing the exact URL visited if there are no redirects so

// the processing below can be the same.

@@ -1391,6 +1499,59 @@

result->set_reached_beginning(true);

}

+void HistoryBackend::QueryHistoryFTS(const string16& text_query,

+ const QueryOptions& options,

+ QueryResults* result) {

+ if (!text_database_)

+ return;

+ // Full text query, first get all the FTS results in the time range.

+ std::vector<TextDatabase::Match> fts_matches;

+ Time first_time_searched;

+ text_database_->GetTextMatches(text_query, options,

+ &fts_matches, &first_time_searched);

+ URLQuerier querier(db_.get(), archived_db_.get(), true);

+ // Now get the row and visit information for each one.

+ URLResult url_result; // Declare outside loop to prevent re-construction.

+ for (size_t i = 0; i < fts_matches.size(); i++) {

+ if (options.max_count != 0 &&

+ static_cast<int>(result->size()) >= options.max_count)

+ break; // Got too many items.

+ // Get the URL, querying the main and archived databases as necessary. If

+ // this is not found, the history and full text search databases are out

+ // of sync and we give up with this result.

+ if (!querier.GetRowForURL(fts_matches[i].url, &url_result))

+ continue;

+ if (!url_result.url().is_valid())

+ continue; // Don't report invalid URLs in case of corruption.

+ // Copy over the FTS stuff that the URLDatabase doesn't know about.

+ // We do this with swap() to avoid copying, since we know we don't

+ // need the original any more. Note that we override the title with the

+ // one from FTS, since that will match the title_match_positions (the

+ // FTS title and the history DB title may differ).

+ url_result.set_title(fts_matches[i].title);

+ url_result.title_match_positions_.swap(

+ fts_matches[i].title_match_positions);

+ url_result.snippet_.Swap(&fts_matches[i].snippet);

+ // The visit time also comes from the full text search database. Since it

+ // has the time, we can avoid an extra query of the visits table.

+ url_result.set_visit_time(fts_matches[i].time);

+ // Add it to the vector, this will clear our |url_row| object as a

+ // result of the swap.

+ result->AppendURLBySwapping(&url_result);

+ }

+ if (first_time_searched <= first_recorded_time_)

+ result->set_reached_beginning(true);

// Frontend to GetMostRecentRedirectsFrom from the history thread.

void HistoryBackend::QueryRedirectsFrom(

scoped_refptr<QueryRedirectsRequest> request,

@@ -1650,6 +1811,14 @@

provider->ExecuteWithDB(this, db_.get(), params);

}

+void HistoryBackend::SetPageContents(const GURL& url,

+ const string16& contents) {

+ // This is histogrammed in the text database manager.

+ if (!text_database_)

+ return;

+ text_database_->AddPageContents(url, contents);

void HistoryBackend::SetPageThumbnail(

const GURL& url,

const gfx::Image* thumbnail,

@@ -1733,23 +1902,6 @@

}

-void HistoryBackend::DeleteFTSIndexDatabases() {

- // Find files on disk matching the text databases file pattern so we can

- // quickly test for and delete them.

- base::FilePath::StringType filepattern =

- FILE_PATH_LITERAL("History Index *");

- base::FileEnumerator enumerator(

- history_dir_, false, base::FileEnumerator::FILES, filepattern);

- int num_databases_deleted = 0;

- base::FilePath current_file;

- while (!(current_file = enumerator.Next()).empty()) {

- if (sql::Connection::Delete(current_file))

- num_databases_deleted++;

- }

- UMA_HISTOGRAM_COUNTS("History.DeleteFTSIndexDatabases",

- num_databases_deleted);

bool HistoryBackend::GetThumbnailFromOlderRedirect(

const GURL& page_url,

std::vector<unsigned char>* data) {

@@ -2516,6 +2668,11 @@

archived_db_->CommitTransaction();

archived_db_->BeginTransaction();

}

+ if (text_database_) {

+ text_database_->CommitTransaction();

+ text_database_->BeginTransaction();

+ }

}

void HistoryBackend::ScheduleCommit() {

@@ -2746,7 +2903,7 @@

// The expirer keeps tabs on the active databases. Tell it about the

// databases which will be closed.

- expirer_.SetDatabases(NULL, NULL, NULL);

+ expirer_.SetDatabases(NULL, NULL, NULL, NULL);

// Reopen a new transaction for |db_| for the sake of CloseAllDatabases().

db_->BeginTransaction();

@@ -2836,7 +2993,15 @@

LOG(ERROR) << "Main history could not be cleared";

kept_urls.clear();

- // Delete archived history.

+ // Delete FTS files & archived history.

+ if (text_database_) {

+ // We assume that the text database has one transaction on them that we need

+ // to close & restart (the long-running history transaction).

+ text_database_->CommitTransaction();

+ text_database_->DeleteAll();

+ text_database_->BeginTransaction();

+ }

if (archived_db_) {

// Close the database and delete the file.

archived_db_.reset();

« no previous file with comments | « trunk/src/chrome/browser/history/history_backend.h ('k') | trunk/src/chrome/browser/history/history_backend_unittest.cc » ('j') | no next file with comments »