| Index: trunk/src/chrome/browser/history/history_backend.cc
|
| ===================================================================
|
| --- trunk/src/chrome/browser/history/history_backend.cc (revision 212472)
|
| +++ trunk/src/chrome/browser/history/history_backend.cc (working copy)
|
| @@ -14,7 +14,6 @@
|
| #include "base/basictypes.h"
|
| #include "base/bind.h"
|
| #include "base/compiler_specific.h"
|
| -#include "base/files/file_enumerator.h"
|
| #include "base/memory/scoped_ptr.h"
|
| #include "base/memory/scoped_vector.h"
|
| #include "base/message_loop/message_loop.h"
|
| @@ -69,6 +68,10 @@
|
|
|
| (this does not store visit segments as they expire after 3 mos.)
|
|
|
| + TextDatabaseManager (manages multiple text database for different times)
|
| + TextDatabase (represents a single month of full-text index).
|
| + ...more TextDatabase objects...
|
| +
|
| ExpireHistoryBackend (manages moving things from HistoryDatabase to
|
| the ArchivedDatabase and deleting)
|
| */
|
| @@ -165,6 +168,53 @@
|
| scoped_refptr<HistoryBackend> history_backend_;
|
| };
|
|
|
| +// Handles querying first the main database, then the full text database if that
|
| +// fails. It will optionally keep track of all URLs seen so duplicates can be
|
| +// eliminated. This is used by the querying sub-functions.
|
| +//
|
| +// TODO(brettw): This class may be able to be simplified or eliminated. After
|
| +// this was written, QueryResults can efficiently look up by URL, so the need
|
| +// for this extra set of previously queried URLs is less important.
|
| +class HistoryBackend::URLQuerier {
|
| + public:
|
| + URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique)
|
| + : main_db_(main_db),
|
| + archived_db_(archived_db),
|
| + track_unique_(track_unique) {
|
| + }
|
| +
|
| + // When we're tracking unique URLs, returns true if this URL has been
|
| + // previously queried. Only call when tracking unique URLs.
|
| + bool HasURL(const GURL& url) {
|
| + DCHECK(track_unique_);
|
| + return unique_urls_.find(url) != unique_urls_.end();
|
| + }
|
| +
|
| + bool GetRowForURL(const GURL& url, URLRow* row) {
|
| + if (!main_db_->GetRowForURL(url, row)) {
|
| + if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) {
|
| + // This row is neither in the main nor the archived DB.
|
| + return false;
|
| + }
|
| + }
|
| +
|
| + if (track_unique_)
|
| + unique_urls_.insert(url);
|
| + return true;
|
| + }
|
| +
|
| + private:
|
| + URLDatabase* main_db_; // Guaranteed non-NULL.
|
| + URLDatabase* archived_db_; // Possibly NULL.
|
| +
|
| + bool track_unique_;
|
| +
|
| + // When track_unique_ is set, this is updated with every URL seen so far.
|
| + std::set<GURL> unique_urls_;
|
| +
|
| + DISALLOW_COPY_AND_ASSIGN(URLQuerier);
|
| +};
|
| +
|
| // HistoryBackend --------------------------------------------------------------
|
|
|
| HistoryBackend::HistoryBackend(const base::FilePath& history_dir,
|
| @@ -532,7 +582,7 @@
|
| }
|
|
|
| // Last, save this redirect chain for later so we can set titles & favicons
|
| - // on the redirected pages properly.
|
| + // on the redirected pages properly. It is indexed by the destination page.
|
| recent_redirects_.Put(request.url, redirects);
|
| }
|
|
|
| @@ -550,6 +600,11 @@
|
| last_ids.second);
|
| }
|
|
|
| + if (text_database_) {
|
| + text_database_->AddPageURL(request.url, last_ids.first, last_ids.second,
|
| + request.time);
|
| + }
|
| +
|
| ScheduleCommit();
|
| }
|
|
|
| @@ -562,14 +617,12 @@
|
|
|
| TimeTicks beginning_time = TimeTicks::Now();
|
|
|
| - // Compute the file names.
|
| + // Compute the file names. Note that the index file can be removed when the
|
| + // text db manager is finished being hooked up.
|
| base::FilePath history_name = history_dir_.Append(chrome::kHistoryFilename);
|
| base::FilePath thumbnail_name = GetThumbnailFileName();
|
| base::FilePath archived_name = GetArchivedFileName();
|
|
|
| - // Delete the old index database files which are no longer used.
|
| - DeleteFTSIndexDatabases();
|
| -
|
| // History database.
|
| db_.reset(new HistoryDatabase());
|
|
|
| @@ -609,8 +662,8 @@
|
| delete mem_backend; // Error case, run without the in-memory DB.
|
| db_->BeginExclusiveMode(); // Must be after the mem backend read the data.
|
|
|
| - // Create the history publisher which needs to be passed on to the thumbnail
|
| - // database for publishing history.
|
| + // Create the history publisher which needs to be passed on to the text and
|
| + // thumbnail databases for publishing history.
|
| history_publisher_.reset(new HistoryPublisher());
|
| if (!history_publisher_->Init()) {
|
| // The init may fail when there are no indexers wanting our history.
|
| @@ -618,6 +671,22 @@
|
| history_publisher_.reset();
|
| }
|
|
|
| + // Full-text database. This has to be first so we can pass it to the
|
| + // HistoryDatabase for migration.
|
| + text_database_.reset(new TextDatabaseManager(history_dir_,
|
| + db_.get(), db_.get()));
|
| + if (!text_database_->Init(history_publisher_.get())) {
|
| + LOG(WARNING) << "Text database initialization failed, running without it.";
|
| + text_database_.reset();
|
| + }
|
| + if (db_->needs_version_17_migration()) {
|
| + // See needs_version_17_migration() decl for more. In this case, we want
|
| + // to erase all the text database files. This must be done after the text
|
| + // database manager has been initialized, since it knows about all the
|
| + // files it manages.
|
| + text_database_->DeleteAll();
|
| + }
|
| +
|
| // Thumbnail database.
|
| thumbnail_db_.reset(new ThumbnailDatabase());
|
| if (!db_->GetNeedsThumbnailMigration()) {
|
| @@ -670,7 +739,7 @@
|
| // The main DB initialization should intuitively be first (not that it
|
| // actually matters) and the expirer should be set last.
|
| expirer_.SetDatabases(db_.get(), archived_db_.get(),
|
| - thumbnail_db_.get());
|
| + thumbnail_db_.get(), text_database_.get());
|
|
|
| // Open the long-running transaction.
|
| db_->BeginTransaction();
|
| @@ -678,6 +747,8 @@
|
| thumbnail_db_->BeginTransaction();
|
| if (archived_db_)
|
| archived_db_->BeginTransaction();
|
| + if (text_database_)
|
| + text_database_->BeginTransaction();
|
|
|
| // Get the first item in our database.
|
| db_->GetStartDate(&first_recorded_time_);
|
| @@ -723,6 +794,10 @@
|
| archived_db_->CommitTransaction();
|
| archived_db_.reset();
|
| }
|
| + if (text_database_) {
|
| + text_database_->CommitTransaction();
|
| + text_database_.reset();
|
| + }
|
| }
|
|
|
| std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(
|
| @@ -786,6 +861,14 @@
|
| return std::make_pair(0, 0);
|
| }
|
| url_info.id_ = url_id;
|
| +
|
| + // We don't actually add the URL to the full text index at this point. It
|
| + // might be nice to do this so that even if we get no title or body, the
|
| + // user can search for URL components and get the page.
|
| + //
|
| + // However, in most cases, we'll get at least a title and usually contents,
|
| + // and this add will be redundant, slowing everything down. As a result,
|
| + // we ignore this edge case.
|
| }
|
|
|
| // Add the visit with the time to the database.
|
| @@ -855,6 +938,26 @@
|
| }
|
| }
|
|
|
| + // Add the page to the full text index. This function is also used for
|
| + // importing. Even though we don't have page contents, we can at least
|
| + // add the title and URL to the index so they can be searched. We don't
|
| + // bother to delete any already-existing FTS entries for the URL, since
|
| + // this is normally called on import.
|
| + //
|
| + // If you ever import *after* first run (selecting import from the menu),
|
| + // then these additional entries will "shadow" the originals when querying
|
| + // for the most recent match only, and the user won't get snippets. This is
|
| + // a very minor issue, and fixing it will make import slower, so we don't
|
| + // bother.
|
| + bool has_indexed = false;
|
| + if (text_database_) {
|
| + // We do not have to make it update the visit database, below, we will
|
| + // create the visit entry with the indexed flag set.
|
| + has_indexed = text_database_->AddPageData(i->url(), url_id, 0,
|
| + i->last_visit(),
|
| + i->title(), string16());
|
| + }
|
| +
|
| // Sync code manages the visits itself.
|
| if (visit_source != SOURCE_SYNCED) {
|
| // Make up a visit to correspond to the last visit to the page.
|
| @@ -863,6 +966,7 @@
|
| content::PAGE_TRANSITION_LINK |
|
| content::PAGE_TRANSITION_CHAIN_START |
|
| content::PAGE_TRANSITION_CHAIN_END), 0);
|
| + visit_info.is_indexed = has_indexed;
|
| if (!visit_database->AddVisit(&visit_info, visit_source)) {
|
| NOTREACHED() << "Adding visit failed.";
|
| return;
|
| @@ -897,6 +1001,10 @@
|
| if (!db_)
|
| return;
|
|
|
| + // Update the full text index.
|
| + if (text_database_)
|
| + text_database_->AddPageTitle(url, title);
|
| +
|
| // Search for recent redirects which should get the same title. We make a
|
| // dummy list containing the exact URL visited if there are no redirects so
|
| // the processing below can be the same.
|
| @@ -1391,6 +1499,59 @@
|
| result->set_reached_beginning(true);
|
| }
|
|
|
| +void HistoryBackend::QueryHistoryFTS(const string16& text_query,
|
| + const QueryOptions& options,
|
| + QueryResults* result) {
|
| + if (!text_database_)
|
| + return;
|
| +
|
| + // Full text query, first get all the FTS results in the time range.
|
| + std::vector<TextDatabase::Match> fts_matches;
|
| + Time first_time_searched;
|
| + text_database_->GetTextMatches(text_query, options,
|
| + &fts_matches, &first_time_searched);
|
| +
|
| + URLQuerier querier(db_.get(), archived_db_.get(), true);
|
| +
|
| + // Now get the row and visit information for each one.
|
| + URLResult url_result; // Declare outside loop to prevent re-construction.
|
| + for (size_t i = 0; i < fts_matches.size(); i++) {
|
| + if (options.max_count != 0 &&
|
| + static_cast<int>(result->size()) >= options.max_count)
|
| + break; // Got too many items.
|
| +
|
| + // Get the URL, querying the main and archived databases as necessary. If
|
| + // this is not found, the history and full text search databases are out
|
| + // of sync and we give up with this result.
|
| + if (!querier.GetRowForURL(fts_matches[i].url, &url_result))
|
| + continue;
|
| +
|
| + if (!url_result.url().is_valid())
|
| + continue; // Don't report invalid URLs in case of corruption.
|
| +
|
| + // Copy over the FTS stuff that the URLDatabase doesn't know about.
|
| + // We do this with swap() to avoid copying, since we know we don't
|
| + // need the original any more. Note that we override the title with the
|
| + // one from FTS, since that will match the title_match_positions (the
|
| + // FTS title and the history DB title may differ).
|
| + url_result.set_title(fts_matches[i].title);
|
| + url_result.title_match_positions_.swap(
|
| + fts_matches[i].title_match_positions);
|
| + url_result.snippet_.Swap(&fts_matches[i].snippet);
|
| +
|
| + // The visit time also comes from the full text search database. Since it
|
| + // has the time, we can avoid an extra query of the visits table.
|
| + url_result.set_visit_time(fts_matches[i].time);
|
| +
|
| + // Add it to the vector, this will clear our |url_row| object as a
|
| + // result of the swap.
|
| + result->AppendURLBySwapping(&url_result);
|
| + }
|
| +
|
| + if (first_time_searched <= first_recorded_time_)
|
| + result->set_reached_beginning(true);
|
| +}
|
| +
|
| // Frontend to GetMostRecentRedirectsFrom from the history thread.
|
| void HistoryBackend::QueryRedirectsFrom(
|
| scoped_refptr<QueryRedirectsRequest> request,
|
| @@ -1650,6 +1811,14 @@
|
| provider->ExecuteWithDB(this, db_.get(), params);
|
| }
|
|
|
| +void HistoryBackend::SetPageContents(const GURL& url,
|
| + const string16& contents) {
|
| + // This is histogrammed in the text database manager.
|
| + if (!text_database_)
|
| + return;
|
| + text_database_->AddPageContents(url, contents);
|
| +}
|
| +
|
| void HistoryBackend::SetPageThumbnail(
|
| const GURL& url,
|
| const gfx::Image* thumbnail,
|
| @@ -1733,23 +1902,6 @@
|
| }
|
| }
|
|
|
| -void HistoryBackend::DeleteFTSIndexDatabases() {
|
| - // Find files on disk matching the text databases file pattern so we can
|
| - // quickly test for and delete them.
|
| - base::FilePath::StringType filepattern =
|
| - FILE_PATH_LITERAL("History Index *");
|
| - base::FileEnumerator enumerator(
|
| - history_dir_, false, base::FileEnumerator::FILES, filepattern);
|
| - int num_databases_deleted = 0;
|
| - base::FilePath current_file;
|
| - while (!(current_file = enumerator.Next()).empty()) {
|
| - if (sql::Connection::Delete(current_file))
|
| - num_databases_deleted++;
|
| - }
|
| - UMA_HISTOGRAM_COUNTS("History.DeleteFTSIndexDatabases",
|
| - num_databases_deleted);
|
| -}
|
| -
|
| bool HistoryBackend::GetThumbnailFromOlderRedirect(
|
| const GURL& page_url,
|
| std::vector<unsigned char>* data) {
|
| @@ -2516,6 +2668,11 @@
|
| archived_db_->CommitTransaction();
|
| archived_db_->BeginTransaction();
|
| }
|
| +
|
| + if (text_database_) {
|
| + text_database_->CommitTransaction();
|
| + text_database_->BeginTransaction();
|
| + }
|
| }
|
|
|
| void HistoryBackend::ScheduleCommit() {
|
| @@ -2746,7 +2903,7 @@
|
|
|
| // The expirer keeps tabs on the active databases. Tell it about the
|
| // databases which will be closed.
|
| - expirer_.SetDatabases(NULL, NULL, NULL);
|
| + expirer_.SetDatabases(NULL, NULL, NULL, NULL);
|
|
|
| // Reopen a new transaction for |db_| for the sake of CloseAllDatabases().
|
| db_->BeginTransaction();
|
| @@ -2836,7 +2993,15 @@
|
| LOG(ERROR) << "Main history could not be cleared";
|
| kept_urls.clear();
|
|
|
| - // Delete archived history.
|
| + // Delete FTS files & archived history.
|
| + if (text_database_) {
|
| + // We assume that the text database has one transaction on them that we need
|
| + // to close & restart (the long-running history transaction).
|
| + text_database_->CommitTransaction();
|
| + text_database_->DeleteAll();
|
| + text_database_->BeginTransaction();
|
| + }
|
| +
|
| if (archived_db_) {
|
| // Close the database and delete the file.
|
| archived_db_.reset();
|
|
|