| Index: chrome/browser/history/text_database_manager.cc
|
| diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc
|
| deleted file mode 100644
|
| index c43a5fcece1e034ae29c7bf64d53ab3899ba3d1f..0000000000000000000000000000000000000000
|
| --- a/chrome/browser/history/text_database_manager.cc
|
| +++ /dev/null
|
| @@ -1,586 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "chrome/browser/history/text_database_manager.h"
|
| -
|
| -#include <algorithm>
|
| -#include <functional>
|
| -
|
| -#include "base/bind.h"
|
| -#include "base/compiler_specific.h"
|
| -#include "base/files/file_enumerator.h"
|
| -#include "base/logging.h"
|
| -#include "base/message_loop/message_loop.h"
|
| -#include "base/metrics/histogram.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "base/strings/utf_string_conversions.h"
|
| -#include "chrome/browser/history/history_publisher.h"
|
| -#include "chrome/browser/history/visit_database.h"
|
| -
|
| -using base::Time;
|
| -using base::TimeDelta;
|
| -using base::TimeTicks;
|
| -
|
| -namespace history {
|
| -
|
| -namespace {
|
| -
|
| -// The number of database files we will be attached to at once.
|
| -const int kCacheDBSize = 5;
|
| -
|
| -std::string ConvertStringForIndexer(const string16& input) {
|
| - // TODO(evanm): other transformations here?
|
| - return UTF16ToUTF8(CollapseWhitespace(input, false));
|
| -}
|
| -
|
| -// Data older than this will be committed to the full text index even if we
|
| -// haven't gotten a title and/or body.
|
| -const int kExpirationSeconds = 20;
|
| -
|
| -} // namespace
|
| -
|
| -// TextDatabaseManager::ChangeSet ----------------------------------------------
|
| -
|
| -TextDatabaseManager::ChangeSet::ChangeSet() {}
|
| -
|
| -TextDatabaseManager::ChangeSet::~ChangeSet() {}
|
| -
|
| -// TextDatabaseManager::PageInfo -----------------------------------------------
|
| -
|
| -TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
|
| - VisitID visit_id,
|
| - Time visit_time)
|
| - : url_id_(url_id),
|
| - visit_id_(visit_id),
|
| - visit_time_(visit_time) {
|
| - added_time_ = TimeTicks::Now();
|
| -}
|
| -
|
| -TextDatabaseManager::PageInfo::~PageInfo() {}
|
| -
|
| -void TextDatabaseManager::PageInfo::set_title(const string16& ttl) {
|
| - if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
|
| - title_ = ASCIIToUTF16(" ");
|
| - else
|
| - title_ = ttl;
|
| -}
|
| -
|
| -void TextDatabaseManager::PageInfo::set_body(const string16& bdy) {
|
| - if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
|
| - body_ = ASCIIToUTF16(" ");
|
| - else
|
| - body_ = bdy;
|
| -}
|
| -
|
| -bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
|
| - return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
|
| -}
|
| -
|
| -// TextDatabaseManager ---------------------------------------------------------
|
| -
|
| -TextDatabaseManager::TextDatabaseManager(const base::FilePath& dir,
|
| - URLDatabase* url_database,
|
| - VisitDatabase* visit_database)
|
| - : dir_(dir),
|
| - url_database_(url_database),
|
| - visit_database_(visit_database),
|
| - recent_changes_(RecentChangeList::NO_AUTO_EVICT),
|
| - transaction_nesting_(0),
|
| - db_cache_(DBCache::NO_AUTO_EVICT),
|
| - present_databases_loaded_(false),
|
| - weak_factory_(this),
|
| - history_publisher_(NULL) {
|
| -}
|
| -
|
| -TextDatabaseManager::~TextDatabaseManager() {
|
| - if (transaction_nesting_)
|
| - CommitTransaction();
|
| -}
|
| -
|
| -// static
|
| -TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) {
|
| - Time::Exploded exploded;
|
| - time.UTCExplode(&exploded);
|
| -
|
| - // We combine the month and year into a 6-digit number (200801 for
|
| - // January, 2008). The month is 1-based.
|
| - return exploded.year * 100 + exploded.month;
|
| -}
|
| -
|
| -// static
|
| -Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) {
|
| - Time::Exploded exploded;
|
| - memset(&exploded, 0, sizeof(Time::Exploded));
|
| - exploded.year = id / 100;
|
| - exploded.month = id % 100;
|
| - return Time::FromUTCExploded(exploded);
|
| -}
|
| -
|
| -bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) {
|
| - history_publisher_ = history_publisher;
|
| -
|
| - // Start checking recent changes and committing them.
|
| - ScheduleFlushOldChanges();
|
| - return true;
|
| -}
|
| -
|
| -void TextDatabaseManager::BeginTransaction() {
|
| - transaction_nesting_++;
|
| -}
|
| -
|
| -void TextDatabaseManager::CommitTransaction() {
|
| - DCHECK(transaction_nesting_);
|
| - transaction_nesting_--;
|
| - if (transaction_nesting_)
|
| - return; // Still more nesting of transactions before committing.
|
| -
|
| - // Commit all databases with open transactions on them.
|
| - for (DBIdentSet::const_iterator i = open_transactions_.begin();
|
| - i != open_transactions_.end(); ++i) {
|
| - DBCache::iterator iter = db_cache_.Get(*i);
|
| - if (iter == db_cache_.end()) {
|
| - NOTREACHED() << "All open transactions should be cached.";
|
| - continue;
|
| - }
|
| - iter->second->CommitTransaction();
|
| - }
|
| - open_transactions_.clear();
|
| -
|
| - // Now that the transaction is over, we can expire old connections.
|
| - db_cache_.ShrinkToSize(kCacheDBSize);
|
| -}
|
| -
|
| -void TextDatabaseManager::InitDBList() {
|
| - if (present_databases_loaded_)
|
| - return;
|
| -
|
| - present_databases_loaded_ = true;
|
| -
|
| - // Find files on disk matching our pattern so we can quickly test for them.
|
| - base::FilePath::StringType filepattern(TextDatabase::file_base());
|
| - filepattern.append(FILE_PATH_LITERAL("*"));
|
| - base::FileEnumerator enumerator(
|
| - dir_, false, base::FileEnumerator::FILES, filepattern);
|
| - base::FilePath cur_file;
|
| - while (!(cur_file = enumerator.Next()).empty()) {
|
| - // Convert to the number representing this file.
|
| - TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file);
|
| - if (id) // Will be 0 on error.
|
| - present_databases_.insert(id);
|
| - }
|
| -}
|
| -
|
| -void TextDatabaseManager::AddPageURL(const GURL& url,
|
| - URLID url_id,
|
| - VisitID visit_id,
|
| - Time time) {
|
| - // Delete any existing page info.
|
| - RecentChangeList::iterator found = recent_changes_.Peek(url);
|
| - if (found != recent_changes_.end())
|
| - recent_changes_.Erase(found);
|
| -
|
| - // Just save this info for later. We will save it when it expires or when all
|
| - // the data is complete.
|
| - recent_changes_.Put(url, PageInfo(url_id, visit_id, time));
|
| -}
|
| -
|
| -void TextDatabaseManager::AddPageTitle(const GURL& url,
|
| - const string16& title) {
|
| - RecentChangeList::iterator found = recent_changes_.Peek(url);
|
| - if (found == recent_changes_.end()) {
|
| - // This page is not in our cache of recent pages. This is very much an edge
|
| - // case as normally a title will come in <20 seconds after the page commits,
|
| - // and WebContents will avoid spamming us with >1 title per page. However,
|
| - // it could come up if your connection is unhappy, and we don't want to
|
| - // miss anything.
|
| - //
|
| - // To solve this problem, we'll just associate the most recent visit with
|
| - // the new title and index that using the regular code path.
|
| - URLRow url_row;
|
| - if (!url_database_->GetRowForURL(url, &url_row))
|
| - return; // URL is unknown, give up.
|
| - VisitRow visit;
|
| - if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
|
| - return; // No recent visit, give up.
|
| -
|
| - if (visit.is_indexed) {
|
| - // If this page was already indexed, we could have a body that came in
|
| - // first and we don't want to overwrite it. We could go query for the
|
| - // current body, or have a special setter for only the title, but this is
|
| - // not worth it for this edge case.
|
| - //
|
| - // It will be almost impossible for the title to take longer than
|
| - // kExpirationSeconds yet we got a body in less than that time, since
|
| - // the title should always come in first.
|
| - return;
|
| - }
|
| -
|
| - AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
|
| - title, string16());
|
| - return; // We don't know about this page, give up.
|
| - }
|
| -
|
| - PageInfo& info = found->second;
|
| - if (info.has_body()) {
|
| - // This info is complete, write to the database.
|
| - AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
|
| - title, info.body());
|
| - recent_changes_.Erase(found);
|
| - return;
|
| - }
|
| -
|
| - info.set_title(title);
|
| -}
|
| -
|
| -void TextDatabaseManager::AddPageContents(const GURL& url,
|
| - const string16& body) {
|
| - RecentChangeList::iterator found = recent_changes_.Peek(url);
|
| - if (found == recent_changes_.end()) {
|
| - // This page is not in our cache of recent pages. This means that the page
|
| - // took more than kExpirationSeconds to load. Often, this will be the result
|
| - // of a very slow iframe or other resource on the page that makes us think
|
| - // it's still loading.
|
| - //
|
| - // As a fallback, set the most recent visit's contents using the input, and
|
| - // use the last set title in the URL table as the title to index.
|
| - URLRow url_row;
|
| - if (!url_database_->GetRowForURL(url, &url_row))
|
| - return; // URL is unknown, give up.
|
| - VisitRow visit;
|
| - if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
|
| - return; // No recent visit, give up.
|
| -
|
| - // Use the title from the URL row as the title for the indexing.
|
| - AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
|
| - url_row.title(), body);
|
| - return;
|
| - }
|
| -
|
| - PageInfo& info = found->second;
|
| - if (info.has_title()) {
|
| - // This info is complete, write to the database.
|
| - AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
|
| - info.title(), body);
|
| - recent_changes_.Erase(found);
|
| - return;
|
| - }
|
| -
|
| - info.set_body(body);
|
| -}
|
| -
|
| -bool TextDatabaseManager::AddPageData(const GURL& url,
|
| - URLID url_id,
|
| - VisitID visit_id,
|
| - Time visit_time,
|
| - const string16& title,
|
| - const string16& body) {
|
| - TextDatabase* db = GetDBForTime(visit_time, true);
|
| - if (!db)
|
| - return false;
|
| -
|
| - TimeTicks beginning_time = TimeTicks::Now();
|
| -
|
| - // First delete any recently-indexed data for this page. This will delete
|
| - // anything in the main database, but we don't bother looking through the
|
| - // archived database.
|
| - VisitVector visits;
|
| - visit_database_->GetIndexedVisitsForURL(url_id, &visits);
|
| - for (size_t i = 0; i < visits.size(); i++) {
|
| - visits[i].is_indexed = false;
|
| - visit_database_->UpdateVisitRow(visits[i]);
|
| - DeletePageData(visits[i].visit_time, url, NULL);
|
| - }
|
| -
|
| - if (visit_id) {
|
| - // We're supposed to update the visit database, so load the visit.
|
| - VisitRow row;
|
| - if (!visit_database_->GetRowForVisit(visit_id, &row)) {
|
| - // This situation can occur if Chrome's history is in the process of
|
| - // being updated, and then the browsing history is deleted before all
|
| - // updates have been completely performed. In this case, a stale update
|
| - // to the database is attempted, leading to the warning below.
|
| - DLOG(WARNING) << "Could not find requested visit #" << visit_id;
|
| - return false;
|
| - }
|
| -
|
| - DCHECK(visit_time == row.visit_time);
|
| -
|
| - // Update the visit database to reference our addition.
|
| - row.is_indexed = true;
|
| - if (!visit_database_->UpdateVisitRow(row))
|
| - return false;
|
| - }
|
| -
|
| - // Now index the data.
|
| - std::string url_str = URLDatabase::GURLToDatabaseURL(url);
|
| - bool success = db->AddPageData(visit_time, url_str,
|
| - ConvertStringForIndexer(title),
|
| - ConvertStringForIndexer(body));
|
| -
|
| - UMA_HISTOGRAM_TIMES("History.AddFTSData",
|
| - TimeTicks::Now() - beginning_time);
|
| -
|
| - if (history_publisher_)
|
| - history_publisher_->PublishPageContent(visit_time, url, title, body);
|
| -
|
| - return success;
|
| -}
|
| -
|
| -void TextDatabaseManager::DeletePageData(Time time, const GURL& url,
|
| - ChangeSet* change_set) {
|
| - TextDatabase::DBIdent db_ident = TimeToID(time);
|
| -
|
| - // We want to open the database for writing, but only if it exists. To
|
| - // achieve this, we check whether it exists by saying we're not going to
|
| - // write to it (avoiding the autocreation code normally called when writing)
|
| - // and then access it for writing only if it succeeds.
|
| - TextDatabase* db = GetDB(db_ident, false);
|
| - if (!db)
|
| - return;
|
| - db = GetDB(db_ident, true);
|
| -
|
| - if (change_set)
|
| - change_set->Add(db_ident);
|
| -
|
| - db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url));
|
| -}
|
| -
|
| -void TextDatabaseManager::DeleteFromUncommitted(
|
| - const std::set<GURL>& restrict_urls, Time begin, Time end) {
|
| - // First find the beginning of the range to delete. Recall that the list
|
| - // has the most recent item at the beginning. There won't normally be very
|
| - // many items, so a brute-force search is fine.
|
| - RecentChangeList::iterator cur = recent_changes_.begin();
|
| - if (!end.is_null()) {
|
| - // Walk from the beginning of the list backwards in time to find the newest
|
| - // entry that should be deleted.
|
| - while (cur != recent_changes_.end() && cur->second.visit_time() >= end)
|
| - ++cur;
|
| - }
|
| -
|
| - // Now delete all visits up to the oldest one we were supposed to delete.
|
| - // Note that if begin is_null, it will be less than or equal to any other
|
| - // time.
|
| - if (restrict_urls.empty()) {
|
| - while (cur != recent_changes_.end() && cur->second.visit_time() >= begin)
|
| - cur = recent_changes_.Erase(cur);
|
| - } else {
|
| - while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) {
|
| - if (restrict_urls.find(cur->first) != restrict_urls.end())
|
| - cur = recent_changes_.Erase(cur);
|
| - else
|
| - ++cur;
|
| - }
|
| - }
|
| -}
|
| -
|
| -void TextDatabaseManager::DeleteFromUncommittedForTimes(
|
| - const std::vector<base::Time>& times) {
|
| - // |times| must be in reverse chronological order, i.e. each member
|
| - // must be earlier than or the same as the one before it.
|
| - DCHECK(
|
| - std::adjacent_find(
|
| - times.begin(), times.end(), std::less<base::Time>()) ==
|
| - times.end());
|
| -
|
| - // Both |recent_changes_| and |times| are in reverse chronological order.
|
| - RecentChangeList::iterator it = recent_changes_.begin();
|
| - std::vector<base::Time>::const_iterator time_it = times.begin();
|
| - while (it != recent_changes_.end() && time_it != times.end()) {
|
| - base::Time visit_time = it->second.visit_time();
|
| - if (visit_time == *time_it) {
|
| - it = recent_changes_.Erase(it);
|
| - } else if (visit_time < *time_it) {
|
| - ++time_it;
|
| - } else /* if (visit_time > *time_it) */ {
|
| - ++it;
|
| - }
|
| - }
|
| -}
|
| -
|
| -void TextDatabaseManager::DeleteAll() {
|
| - DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction.";
|
| -
|
| - InitDBList();
|
| -
|
| - // Delete uncommitted entries.
|
| - recent_changes_.Clear();
|
| -
|
| - // Close all open databases.
|
| - db_cache_.Clear();
|
| -
|
| - // Now go through and delete all the files.
|
| - for (DBIdentSet::iterator i = present_databases_.begin();
|
| - i != present_databases_.end(); ++i) {
|
| - base::FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i));
|
| - sql::Connection::Delete(file_name);
|
| - }
|
| -}
|
| -
|
| -void TextDatabaseManager::OptimizeChangedDatabases(
|
| - const ChangeSet& change_set) {
|
| - for (ChangeSet::DBSet::const_iterator i =
|
| - change_set.changed_databases_.begin();
|
| - i != change_set.changed_databases_.end(); ++i) {
|
| - // We want to open the database for writing, but only if it exists. To
|
| - // achieve this, we check whether it exists by saying we're not going to
|
| - // write to it (avoiding the autocreation code normally called when writing)
|
| - // and then access it for writing only if it succeeds.
|
| - TextDatabase* db = GetDB(*i, false);
|
| - if (!db)
|
| - continue;
|
| - db = GetDB(*i, true);
|
| - if (!db)
|
| - continue; // The file may have changed or something.
|
| - db->Optimize();
|
| - }
|
| -}
|
| -
|
| -void TextDatabaseManager::GetTextMatches(
|
| - const string16& query,
|
| - const QueryOptions& options,
|
| - std::vector<TextDatabase::Match>* results,
|
| - Time* first_time_searched) {
|
| - results->clear();
|
| -
|
| - *first_time_searched = options.begin_time;
|
| -
|
| - InitDBList();
|
| - if (present_databases_.empty())
|
| - return; // Nothing to search.
|
| -
|
| - // Get the query into the proper format for the individual DBs.
|
| - string16 fts_query16;
|
| - query_parser_.ParseQuery(query, &fts_query16);
|
| - std::string fts_query = UTF16ToUTF8(fts_query16);
|
| -
|
| - // Need a copy of the options so we can modify the max count for each call
|
| - // to the individual databases.
|
| - QueryOptions cur_options(options);
|
| -
|
| - // Compute the minimum and maximum values for the identifiers that could
|
| - // encompass the input time range.
|
| - TextDatabase::DBIdent min_ident = options.begin_time.is_null() ?
|
| - *present_databases_.begin() :
|
| - TimeToID(options.begin_time);
|
| - TextDatabase::DBIdent max_ident = options.end_time.is_null() ?
|
| - *present_databases_.rbegin() :
|
| - TimeToID(options.end_time);
|
| -
|
| - // Iterate over the databases from the most recent backwards.
|
| - TextDatabase::URLSet found_urls;
|
| - for (DBIdentSet::reverse_iterator i = present_databases_.rbegin();
|
| - i != present_databases_.rend();
|
| - ++i) {
|
| - // TODO(brettw) allow canceling the query in the middle.
|
| - // if (canceled_or_something)
|
| - // break;
|
| -
|
| - // This code is stupid, we just loop until we find the correct starting
|
| - // time range rather than search in an intelligent way. Users will have a
|
| - // few dozen files at most, so this should not be an issue.
|
| - if (*i > max_ident)
|
| - continue; // Haven't gotten to the time range yet.
|
| - if (*i < min_ident)
|
| - break; // Covered all the time range.
|
| -
|
| - TextDatabase* cur_db = GetDB(*i, false);
|
| - if (!cur_db)
|
| - continue;
|
| -
|
| - // Adjust the max count according to how many results we've already got.
|
| - if (options.max_count) {
|
| - cur_options.max_count = options.max_count -
|
| - static_cast<int>(results->size());
|
| - }
|
| -
|
| - bool has_more_results = cur_db->GetTextMatches(
|
| - fts_query, cur_options, results, &found_urls);
|
| -
|
| - DCHECK(static_cast<int>(results->size()) <= options.EffectiveMaxCount());
|
| -
|
| - if (has_more_results ||
|
| - static_cast<int>(results->size()) == options.EffectiveMaxCount()) {
|
| - // Since the search proceeds backwards in time, the last result we have
|
| - // gives the first time searched.
|
| - *first_time_searched = results->back().time;
|
| - break;
|
| - }
|
| - }
|
| -}
|
| -
|
| -size_t TextDatabaseManager::GetUncommittedEntryCountForTest() const {
|
| - return recent_changes_.size();
|
| -}
|
| -
|
| -TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id,
|
| - bool for_writing) {
|
| - DBCache::iterator found_db = db_cache_.Get(id);
|
| - if (found_db != db_cache_.end()) {
|
| - if (transaction_nesting_ && for_writing &&
|
| - open_transactions_.find(id) == open_transactions_.end()) {
|
| - // If we currently have an open transaction, that database is not yet
|
| - // part of the transaction, and the database will be written to, it needs
|
| - // to be part of our transaction.
|
| - found_db->second->BeginTransaction();
|
| - open_transactions_.insert(id);
|
| - }
|
| - return found_db->second;
|
| - }
|
| -
|
| - // Need to make the database.
|
| - TextDatabase* new_db = new TextDatabase(dir_, id, for_writing);
|
| - if (!new_db->Init()) {
|
| - delete new_db;
|
| - return NULL;
|
| - }
|
| - db_cache_.Put(id, new_db);
|
| - present_databases_.insert(id);
|
| -
|
| - if (transaction_nesting_ && for_writing) {
|
| - // If we currently have an open transaction and the new database will be
|
| - // written to, it needs to be part of our transaction.
|
| - new_db->BeginTransaction();
|
| - open_transactions_.insert(id);
|
| - }
|
| -
|
| - // When no transaction is open, allow this new one to kick out an old one.
|
| - if (!transaction_nesting_)
|
| - db_cache_.ShrinkToSize(kCacheDBSize);
|
| -
|
| - return new_db;
|
| -}
|
| -
|
| -TextDatabase* TextDatabaseManager::GetDBForTime(Time time,
|
| - bool create_if_necessary) {
|
| - return GetDB(TimeToID(time), create_if_necessary);
|
| -}
|
| -
|
| -void TextDatabaseManager::ScheduleFlushOldChanges() {
|
| - weak_factory_.InvalidateWeakPtrs();
|
| - base::MessageLoop::current()->PostDelayedTask(
|
| - FROM_HERE,
|
| - base::Bind(&TextDatabaseManager::FlushOldChanges,
|
| - weak_factory_.GetWeakPtr()),
|
| - base::TimeDelta::FromSeconds(kExpirationSeconds));
|
| -}
|
| -
|
| -void TextDatabaseManager::FlushOldChanges() {
|
| - FlushOldChangesForTime(TimeTicks::Now());
|
| -}
|
| -
|
| -void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) {
|
| - // The end of the list is the oldest, so we just start from there committing
|
| - // things until we get something too new.
|
| - RecentChangeList::reverse_iterator i = recent_changes_.rbegin();
|
| - while (i != recent_changes_.rend() && i->second.Expired(now)) {
|
| - AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
|
| - i->second.visit_time(), i->second.title(), i->second.body());
|
| - i = recent_changes_.Erase(i);
|
| - }
|
| -
|
| - ScheduleFlushOldChanges();
|
| -}
|
| -
|
| -} // namespace history
|
|
|