Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(367)

Unified Diff: trunk/src/chrome/browser/history/history_backend.cc

Issue 19637009: Revert 212459 "Remove TextDatabase from the history service." (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: trunk/src/chrome/browser/history/history_backend.cc
===================================================================
--- trunk/src/chrome/browser/history/history_backend.cc (revision 212472)
+++ trunk/src/chrome/browser/history/history_backend.cc (working copy)
@@ -14,7 +14,6 @@
#include "base/basictypes.h"
#include "base/bind.h"
#include "base/compiler_specific.h"
-#include "base/files/file_enumerator.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/scoped_vector.h"
#include "base/message_loop/message_loop.h"
@@ -69,6 +68,10 @@
(this does not store visit segments as they expire after 3 mos.)
+ TextDatabaseManager (manages multiple text database for different times)
+ TextDatabase (represents a single month of full-text index).
+ ...more TextDatabase objects...
+
ExpireHistoryBackend (manages moving things from HistoryDatabase to
the ArchivedDatabase and deleting)
*/
@@ -165,6 +168,53 @@
scoped_refptr<HistoryBackend> history_backend_;
};
+// Handles querying first the main database, then the full text database if that
+// fails. It will optionally keep track of all URLs seen so duplicates can be
+// eliminated. This is used by the querying sub-functions.
+//
+// TODO(brettw): This class may be able to be simplified or eliminated. After
+// this was written, QueryResults can efficiently look up by URL, so the need
+// for this extra set of previously queried URLs is less important.
+class HistoryBackend::URLQuerier {
+ public:
+ URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique)
+ : main_db_(main_db),
+ archived_db_(archived_db),
+ track_unique_(track_unique) {
+ }
+
+ // When we're tracking unique URLs, returns true if this URL has been
+ // previously queried. Only call when tracking unique URLs.
+ bool HasURL(const GURL& url) {
+ DCHECK(track_unique_);
+ return unique_urls_.find(url) != unique_urls_.end();
+ }
+
+ bool GetRowForURL(const GURL& url, URLRow* row) {
+ if (!main_db_->GetRowForURL(url, row)) {
+ if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) {
+ // This row is neither in the main nor the archived DB.
+ return false;
+ }
+ }
+
+ if (track_unique_)
+ unique_urls_.insert(url);
+ return true;
+ }
+
+ private:
+ URLDatabase* main_db_; // Guaranteed non-NULL.
+ URLDatabase* archived_db_; // Possibly NULL.
+
+ bool track_unique_;
+
+ // When track_unique_ is set, this is updated with every URL seen so far.
+ std::set<GURL> unique_urls_;
+
+ DISALLOW_COPY_AND_ASSIGN(URLQuerier);
+};
+
// HistoryBackend --------------------------------------------------------------
HistoryBackend::HistoryBackend(const base::FilePath& history_dir,
@@ -532,7 +582,7 @@
}
// Last, save this redirect chain for later so we can set titles & favicons
- // on the redirected pages properly.
+ // on the redirected pages properly. It is indexed by the destination page.
recent_redirects_.Put(request.url, redirects);
}
@@ -550,6 +600,11 @@
last_ids.second);
}
+ if (text_database_) {
+ text_database_->AddPageURL(request.url, last_ids.first, last_ids.second,
+ request.time);
+ }
+
ScheduleCommit();
}
@@ -562,14 +617,12 @@
TimeTicks beginning_time = TimeTicks::Now();
- // Compute the file names.
+ // Compute the file names. Note that the index file can be removed when the
+ // text db manager is finished being hooked up.
base::FilePath history_name = history_dir_.Append(chrome::kHistoryFilename);
base::FilePath thumbnail_name = GetThumbnailFileName();
base::FilePath archived_name = GetArchivedFileName();
- // Delete the old index database files which are no longer used.
- DeleteFTSIndexDatabases();
-
// History database.
db_.reset(new HistoryDatabase());
@@ -609,8 +662,8 @@
delete mem_backend; // Error case, run without the in-memory DB.
db_->BeginExclusiveMode(); // Must be after the mem backend read the data.
- // Create the history publisher which needs to be passed on to the thumbnail
- // database for publishing history.
+ // Create the history publisher which needs to be passed on to the text and
+ // thumbnail databases for publishing history.
history_publisher_.reset(new HistoryPublisher());
if (!history_publisher_->Init()) {
// The init may fail when there are no indexers wanting our history.
@@ -618,6 +671,22 @@
history_publisher_.reset();
}
+ // Full-text database. This has to be first so we can pass it to the
+ // HistoryDatabase for migration.
+ text_database_.reset(new TextDatabaseManager(history_dir_,
+ db_.get(), db_.get()));
+ if (!text_database_->Init(history_publisher_.get())) {
+ LOG(WARNING) << "Text database initialization failed, running without it.";
+ text_database_.reset();
+ }
+ if (db_->needs_version_17_migration()) {
+ // See needs_version_17_migration() decl for more. In this case, we want
+ // to erase all the text database files. This must be done after the text
+ // database manager has been initialized, since it knows about all the
+ // files it manages.
+ text_database_->DeleteAll();
+ }
+
// Thumbnail database.
thumbnail_db_.reset(new ThumbnailDatabase());
if (!db_->GetNeedsThumbnailMigration()) {
@@ -670,7 +739,7 @@
// The main DB initialization should intuitively be first (not that it
// actually matters) and the expirer should be set last.
expirer_.SetDatabases(db_.get(), archived_db_.get(),
- thumbnail_db_.get());
+ thumbnail_db_.get(), text_database_.get());
// Open the long-running transaction.
db_->BeginTransaction();
@@ -678,6 +747,8 @@
thumbnail_db_->BeginTransaction();
if (archived_db_)
archived_db_->BeginTransaction();
+ if (text_database_)
+ text_database_->BeginTransaction();
// Get the first item in our database.
db_->GetStartDate(&first_recorded_time_);
@@ -723,6 +794,10 @@
archived_db_->CommitTransaction();
archived_db_.reset();
}
+ if (text_database_) {
+ text_database_->CommitTransaction();
+ text_database_.reset();
+ }
}
std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(
@@ -786,6 +861,14 @@
return std::make_pair(0, 0);
}
url_info.id_ = url_id;
+
+ // We don't actually add the URL to the full text index at this point. It
+ // might be nice to do this so that even if we get no title or body, the
+ // user can search for URL components and get the page.
+ //
+ // However, in most cases, we'll get at least a title and usually contents,
+ // and this add will be redundant, slowing everything down. As a result,
+ // we ignore this edge case.
}
// Add the visit with the time to the database.
@@ -855,6 +938,26 @@
}
}
+ // Add the page to the full text index. This function is also used for
+ // importing. Even though we don't have page contents, we can at least
+ // add the title and URL to the index so they can be searched. We don't
+ // bother to delete any already-existing FTS entries for the URL, since
+ // this is normally called on import.
+ //
+ // If you ever import *after* first run (selecting import from the menu),
+ // then these additional entries will "shadow" the originals when querying
+ // for the most recent match only, and the user won't get snippets. This is
+ // a very minor issue, and fixing it will make import slower, so we don't
+ // bother.
+ bool has_indexed = false;
+ if (text_database_) {
+ // We do not have to make it update the visit database, below, we will
+ // create the visit entry with the indexed flag set.
+ has_indexed = text_database_->AddPageData(i->url(), url_id, 0,
+ i->last_visit(),
+ i->title(), string16());
+ }
+
// Sync code manages the visits itself.
if (visit_source != SOURCE_SYNCED) {
// Make up a visit to correspond to the last visit to the page.
@@ -863,6 +966,7 @@
content::PAGE_TRANSITION_LINK |
content::PAGE_TRANSITION_CHAIN_START |
content::PAGE_TRANSITION_CHAIN_END), 0);
+ visit_info.is_indexed = has_indexed;
if (!visit_database->AddVisit(&visit_info, visit_source)) {
NOTREACHED() << "Adding visit failed.";
return;
@@ -897,6 +1001,10 @@
if (!db_)
return;
+ // Update the full text index.
+ if (text_database_)
+ text_database_->AddPageTitle(url, title);
+
// Search for recent redirects which should get the same title. We make a
// dummy list containing the exact URL visited if there are no redirects so
// the processing below can be the same.
@@ -1391,6 +1499,59 @@
result->set_reached_beginning(true);
}
+void HistoryBackend::QueryHistoryFTS(const string16& text_query,
+ const QueryOptions& options,
+ QueryResults* result) {
+ if (!text_database_)
+ return;
+
+ // Full text query, first get all the FTS results in the time range.
+ std::vector<TextDatabase::Match> fts_matches;
+ Time first_time_searched;
+ text_database_->GetTextMatches(text_query, options,
+ &fts_matches, &first_time_searched);
+
+ URLQuerier querier(db_.get(), archived_db_.get(), true);
+
+ // Now get the row and visit information for each one.
+ URLResult url_result; // Declare outside loop to prevent re-construction.
+ for (size_t i = 0; i < fts_matches.size(); i++) {
+ if (options.max_count != 0 &&
+ static_cast<int>(result->size()) >= options.max_count)
+ break; // Got too many items.
+
+ // Get the URL, querying the main and archived databases as necessary. If
+ // this is not found, the history and full text search databases are out
+ // of sync and we give up with this result.
+ if (!querier.GetRowForURL(fts_matches[i].url, &url_result))
+ continue;
+
+ if (!url_result.url().is_valid())
+ continue; // Don't report invalid URLs in case of corruption.
+
+ // Copy over the FTS stuff that the URLDatabase doesn't know about.
+ // We do this with swap() to avoid copying, since we know we don't
+ // need the original any more. Note that we override the title with the
+ // one from FTS, since that will match the title_match_positions (the
+ // FTS title and the history DB title may differ).
+ url_result.set_title(fts_matches[i].title);
+ url_result.title_match_positions_.swap(
+ fts_matches[i].title_match_positions);
+ url_result.snippet_.Swap(&fts_matches[i].snippet);
+
+ // The visit time also comes from the full text search database. Since it
+ // has the time, we can avoid an extra query of the visits table.
+ url_result.set_visit_time(fts_matches[i].time);
+
+ // Add it to the vector, this will clear our |url_row| object as a
+ // result of the swap.
+ result->AppendURLBySwapping(&url_result);
+ }
+
+ if (first_time_searched <= first_recorded_time_)
+ result->set_reached_beginning(true);
+}
+
// Frontend to GetMostRecentRedirectsFrom from the history thread.
void HistoryBackend::QueryRedirectsFrom(
scoped_refptr<QueryRedirectsRequest> request,
@@ -1650,6 +1811,14 @@
provider->ExecuteWithDB(this, db_.get(), params);
}
+void HistoryBackend::SetPageContents(const GURL& url,
+ const string16& contents) {
+ // This is histogrammed in the text database manager.
+ if (!text_database_)
+ return;
+ text_database_->AddPageContents(url, contents);
+}
+
void HistoryBackend::SetPageThumbnail(
const GURL& url,
const gfx::Image* thumbnail,
@@ -1733,23 +1902,6 @@
}
}
-void HistoryBackend::DeleteFTSIndexDatabases() {
- // Find files on disk matching the text databases file pattern so we can
- // quickly test for and delete them.
- base::FilePath::StringType filepattern =
- FILE_PATH_LITERAL("History Index *");
- base::FileEnumerator enumerator(
- history_dir_, false, base::FileEnumerator::FILES, filepattern);
- int num_databases_deleted = 0;
- base::FilePath current_file;
- while (!(current_file = enumerator.Next()).empty()) {
- if (sql::Connection::Delete(current_file))
- num_databases_deleted++;
- }
- UMA_HISTOGRAM_COUNTS("History.DeleteFTSIndexDatabases",
- num_databases_deleted);
-}
-
bool HistoryBackend::GetThumbnailFromOlderRedirect(
const GURL& page_url,
std::vector<unsigned char>* data) {
@@ -2516,6 +2668,11 @@
archived_db_->CommitTransaction();
archived_db_->BeginTransaction();
}
+
+ if (text_database_) {
+ text_database_->CommitTransaction();
+ text_database_->BeginTransaction();
+ }
}
void HistoryBackend::ScheduleCommit() {
@@ -2746,7 +2903,7 @@
// The expirer keeps tabs on the active databases. Tell it about the
// databases which will be closed.
- expirer_.SetDatabases(NULL, NULL, NULL);
+ expirer_.SetDatabases(NULL, NULL, NULL, NULL);
// Reopen a new transaction for |db_| for the sake of CloseAllDatabases().
db_->BeginTransaction();
@@ -2836,7 +2993,15 @@
LOG(ERROR) << "Main history could not be cleared";
kept_urls.clear();
- // Delete archived history.
+ // Delete FTS files & archived history.
+ if (text_database_) {
+ // We assume that the text database has one transaction on them that we need
+ // to close & restart (the long-running history transaction).
+ text_database_->CommitTransaction();
+ text_database_->DeleteAll();
+ text_database_->BeginTransaction();
+ }
+
if (archived_db_) {
// Close the database and delete the file.
archived_db_.reset();
« no previous file with comments | « trunk/src/chrome/browser/history/history_backend.h ('k') | trunk/src/chrome/browser/history/history_backend_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698