| Index: chrome/browser/history/text_database.cc
|
| diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc
|
| deleted file mode 100644
|
| index 0f5db891caf6cf0315aeb3e068817cb443bba1ab..0000000000000000000000000000000000000000
|
| --- a/chrome/browser/history/text_database.cc
|
| +++ /dev/null
|
| @@ -1,353 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include <limits>
|
| -#include <set>
|
| -#include <string>
|
| -
|
| -#include "chrome/browser/history/text_database.h"
|
| -
|
| -#include "base/file_util.h"
|
| -#include "base/logging.h"
|
| -#include "base/metrics/histogram.h"
|
| -#include "base/strings/string_number_conversions.h"
|
| -#include "base/strings/stringprintf.h"
|
| -#include "base/strings/utf_string_conversions.h"
|
| -#include "sql/statement.h"
|
| -#include "sql/transaction.h"
|
| -
|
| -// There are two tables in each database, one full-text search (FTS) table which
|
| -// indexes the contents and title of the pages. The other is a regular SQLITE
|
| -// table which contains non-indexed information about the page. All columns of
|
| -// a FTS table are indexed using the text search algorithm, which isn't what we
|
| -// want for things like times. If this were in the FTS table, there would be
|
| -// different words in the index for each time number.
|
| -//
|
| -// "pages" FTS table:
|
| -// url URL of the page so searches will match the URL.
|
| -// title Title of the page.
|
| -// body Body of the page.
|
| -//
|
| -// "info" regular table:
|
| -// time Time the corresponding FTS entry was visited.
|
| -//
|
| -// We do joins across these two tables by using their internal rowids, which we
|
| -// keep in sync between the two tables. The internal rowid is the only part of
|
| -// an FTS table that is indexed like a normal table, and the index over it is
|
| -// free since sqlite always indexes the internal rowid.
|
| -
|
| -namespace history {
|
| -
|
| -namespace {
|
| -
|
| -// Version 1 uses FTS2 for index files.
|
| -// Version 2 uses FTS3.
|
| -static const int kCurrentVersionNumber = 2;
|
| -static const int kCompatibleVersionNumber = 2;
|
| -
|
| -// Snippet computation relies on the index of the columns in the original
|
| -// create statement. These are the 0-based indices (as strings) of the
|
| -// corresponding columns.
|
| -const char kTitleColumnIndex[] = "1";
|
| -const char kBodyColumnIndex[] = "2";
|
| -
|
| -// The string prepended to the database identifier to generate the filename.
|
| -const base::FilePath::CharType kFilePrefix[] =
|
| - FILE_PATH_LITERAL("History Index ");
|
| -
|
| -} // namespace
|
| -
|
| -TextDatabase::Match::Match() {}
|
| -
|
| -TextDatabase::Match::~Match() {}
|
| -
|
| -TextDatabase::TextDatabase(const base::FilePath& path,
|
| - DBIdent id,
|
| - bool allow_create)
|
| - : path_(path),
|
| - ident_(id),
|
| - allow_create_(allow_create) {
|
| - // Compute the file name.
|
| - file_name_ = path_.Append(IDToFileName(ident_));
|
| -}
|
| -
|
| -TextDatabase::~TextDatabase() {
|
| -}
|
| -
|
| -// static
|
| -const base::FilePath::CharType* TextDatabase::file_base() {
|
| - return kFilePrefix;
|
| -}
|
| -
|
| -// static
|
| -base::FilePath TextDatabase::IDToFileName(DBIdent id) {
|
| - // Identifiers are intended to be a combination of the year and month, for
|
| - // example, 200801 for January 2008. We convert this to
|
| - // "History Index 2008-01". However, we don't make assumptions about this
|
| - // scheme: the caller should assign IDs as it feels fit with the knowledge
|
| - // that they will apppear on disk in this form.
|
| - base::FilePath::StringType filename(file_base());
|
| - base::StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"),
|
| - id / 100, id % 100);
|
| - return base::FilePath(filename);
|
| -}
|
| -
|
| -// static
|
| -TextDatabase::DBIdent TextDatabase::FileNameToID(
|
| - const base::FilePath& file_path) {
|
| - base::FilePath::StringType file_name = file_path.BaseName().value();
|
| -
|
| - // We don't actually check the prefix here. Since the file system could
|
| - // be case insensitive in ways we can't predict (NTFS), checking could
|
| - // potentially be the wrong thing to do. Instead, we just look for a suffix.
|
| - static const size_t kIDStringLength = 7; // Room for "xxxx-xx".
|
| - if (file_name.length() < kIDStringLength)
|
| - return 0;
|
| - const base::FilePath::StringType suffix(
|
| - &file_name[file_name.length() - kIDStringLength]);
|
| -
|
| - if (suffix.length() != kIDStringLength ||
|
| - suffix[4] != FILE_PATH_LITERAL('-')) {
|
| - return 0;
|
| - }
|
| -
|
| - // TODO: Once StringPiece supports a templated interface over the
|
| - // underlying string type, use it here instead of substr, since that
|
| - // will avoid needless string copies. StringPiece cannot be used
|
| - // right now because base::FilePath::StringType could use either 8 or 16 bit
|
| - // characters, depending on the OS.
|
| - int year, month;
|
| - base::StringToInt(suffix.substr(0, 4), &year);
|
| - base::StringToInt(suffix.substr(5, 2), &month);
|
| -
|
| - return year * 100 + month;
|
| -}
|
| -
|
| -bool TextDatabase::Init() {
|
| - // Make sure, if we're not allowed to create the file, that it exists.
|
| - if (!allow_create_) {
|
| - if (!base::PathExists(file_name_))
|
| - return false;
|
| - }
|
| -
|
| - db_.set_histogram_tag("Text");
|
| -
|
| - // Set the database page size to something a little larger to give us
|
| - // better performance (we're typically seek rather than bandwidth limited).
|
| - // This only has an effect before any tables have been created, otherwise
|
| - // this is a NOP. Must be a power of 2 and a max of 8192.
|
| - db_.set_page_size(4096);
|
| -
|
| - // The default cache size is 2000 which give >8MB of data. Since we will often
|
| - // have 2-3 of these objects, each with their own 8MB, this adds up very fast.
|
| - // We therefore reduce the size so when there are multiple objects, we're not
|
| - // too big.
|
| - db_.set_cache_size(512);
|
| -
|
| - // Run the database in exclusive mode. Nobody else should be accessing the
|
| - // database while we're running, and this will give somewhat improved perf.
|
| - db_.set_exclusive_locking();
|
| -
|
| - // Attach the database to our index file.
|
| - if (!db_.Open(file_name_))
|
| - return false;
|
| -
|
| - // Meta table tracking version information.
|
| - if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber))
|
| - return false;
|
| - if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
|
| - // This version is too new. We don't bother notifying the user on this
|
| - // error, and just fail to use the file. Normally if they have version skew,
|
| - // they will get it for the main history file and it won't be necessary
|
| - // here. If that's not the case, since this is only indexed data, it's
|
| - // probably better to just not give FTS results than strange errors when
|
| - // everything else is working OK.
|
| - LOG(WARNING) << "Text database is too new.";
|
| - return false;
|
| - }
|
| -
|
| - return CreateTables();
|
| -}
|
| -
|
| -void TextDatabase::BeginTransaction() {
|
| - db_.BeginTransaction();
|
| -}
|
| -
|
| -void TextDatabase::CommitTransaction() {
|
| - db_.CommitTransaction();
|
| -}
|
| -
|
| -bool TextDatabase::CreateTables() {
|
| - // FTS table of page contents.
|
| - if (!db_.DoesTableExist("pages")) {
|
| - if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3("
|
| - "TOKENIZE icu,"
|
| - "url LONGVARCHAR,"
|
| - "title LONGVARCHAR,"
|
| - "body LONGVARCHAR)"))
|
| - return false;
|
| - }
|
| -
|
| - // Non-FTS table containing URLs and times so we can efficiently find them
|
| - // using a regular index (all FTS columns are special and are treated as
|
| - // full-text-search, which is not what we want when retrieving this data).
|
| - if (!db_.DoesTableExist("info")) {
|
| - // Note that there is no point in creating an index over time. Since
|
| - // we must always query the entire FTS table (it can not efficiently do
|
| - // subsets), we will always end up doing that first, and joining the info
|
| - // table off of that.
|
| - if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)"))
|
| - return false;
|
| - }
|
| -
|
| - // Create the index.
|
| - return db_.Execute("CREATE INDEX IF NOT EXISTS info_time ON info(time)");
|
| -}
|
| -
|
| -bool TextDatabase::AddPageData(base::Time time,
|
| - const std::string& url,
|
| - const std::string& title,
|
| - const std::string& contents) {
|
| - sql::Transaction committer(&db_);
|
| - if (!committer.Begin())
|
| - return false;
|
| -
|
| - // Add to the pages table.
|
| - sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "INSERT INTO pages (url, title, body) VALUES (?,?,?)"));
|
| - add_to_pages.BindString(0, url);
|
| - add_to_pages.BindString(1, title);
|
| - add_to_pages.BindString(2, contents);
|
| - if (!add_to_pages.Run())
|
| - return false;
|
| -
|
| - int64 rowid = db_.GetLastInsertRowId();
|
| -
|
| - // Add to the info table with the same rowid.
|
| - sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "INSERT INTO info (rowid, time) VALUES (?,?)"));
|
| - add_to_info.BindInt64(0, rowid);
|
| - add_to_info.BindInt64(1, time.ToInternalValue());
|
| -
|
| - if (!add_to_info.Run())
|
| - return false;
|
| -
|
| - return committer.Commit();
|
| -}
|
| -
|
| -void TextDatabase::DeletePageData(base::Time time, const std::string& url) {
|
| - // First get all rows that match. Selecing on time (which has an index) allows
|
| - // us to avoid brute-force searches on the full-text-index table (there will
|
| - // generally be only one match per time).
|
| - sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "SELECT info.rowid "
|
| - "FROM info JOIN pages ON info.rowid = pages.rowid "
|
| - "WHERE info.time=? AND pages.url=?"));
|
| - select_ids.BindInt64(0, time.ToInternalValue());
|
| - select_ids.BindString(1, url);
|
| -
|
| - std::set<int64> rows_to_delete;
|
| - while (select_ids.Step())
|
| - rows_to_delete.insert(select_ids.ColumnInt64(0));
|
| -
|
| - // Delete from the pages table.
|
| - sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "DELETE FROM pages WHERE rowid=?"));
|
| -
|
| - for (std::set<int64>::const_iterator i = rows_to_delete.begin();
|
| - i != rows_to_delete.end(); ++i) {
|
| - delete_page.BindInt64(0, *i);
|
| - if (!delete_page.Run())
|
| - return;
|
| - delete_page.Reset(true);
|
| - }
|
| -
|
| - // Delete from the info table.
|
| - sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "DELETE FROM info WHERE rowid=?"));
|
| -
|
| - for (std::set<int64>::const_iterator i = rows_to_delete.begin();
|
| - i != rows_to_delete.end(); ++i) {
|
| - delete_info.BindInt64(0, *i);
|
| - if (!delete_info.Run())
|
| - return;
|
| - delete_info.Reset(true);
|
| - }
|
| -}
|
| -
|
| -void TextDatabase::Optimize() {
|
| - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
|
| - "SELECT OPTIMIZE(pages) FROM pages LIMIT 1"));
|
| - statement.Run();
|
| -}
|
| -
|
| -bool TextDatabase::GetTextMatches(const std::string& query,
|
| - const QueryOptions& options,
|
| - std::vector<Match>* results,
|
| - URLSet* found_urls) {
|
| - std::string sql = "SELECT url, title, time, offsets(pages), body FROM pages "
|
| - "LEFT OUTER JOIN info ON pages.rowid = info.rowid WHERE ";
|
| - sql += options.body_only ? "body " : "pages ";
|
| - sql += "MATCH ? AND time >= ? AND time < ? ";
|
| - // Times may not be unique, so also sort by rowid to ensure a stable order.
|
| - sql += "ORDER BY time DESC, info.rowid DESC";
|
| -
|
| - // Generate unique IDs for the two possible variations of the statement,
|
| - // so they don't share the same cached prepared statement.
|
| - sql::StatementID body_only_id = SQL_FROM_HERE;
|
| - sql::StatementID pages_id = SQL_FROM_HERE;
|
| -
|
| - sql::Statement statement(db_.GetCachedStatement(
|
| - (options.body_only ? body_only_id : pages_id), sql.c_str()));
|
| -
|
| - statement.BindString(0, query);
|
| - statement.BindInt64(1, options.EffectiveBeginTime());
|
| - statement.BindInt64(2, options.EffectiveEndTime());
|
| -
|
| - // |results| may not be initially empty, so keep track of how many were added
|
| - // by this call.
|
| - int result_count = 0;
|
| -
|
| - while (statement.Step()) {
|
| - // TODO(brettw) allow canceling the query in the middle.
|
| - // if (canceled_or_something)
|
| - // break;
|
| -
|
| - GURL url(statement.ColumnString(0));
|
| - URLSet::const_iterator found_url = found_urls->find(url);
|
| - if (found_url != found_urls->end())
|
| - continue; // Don't add this duplicate.
|
| -
|
| - if (++result_count > options.EffectiveMaxCount())
|
| - break;
|
| -
|
| - // Fill the results into the vector (avoid copying the URL with Swap()).
|
| - results->resize(results->size() + 1);
|
| - Match& match = results->at(results->size() - 1);
|
| - match.url.Swap(&url);
|
| -
|
| - match.title = statement.ColumnString16(1);
|
| - match.time = base::Time::FromInternalValue(statement.ColumnInt64(2));
|
| -
|
| - // Extract any matches in the title.
|
| - std::string offsets_str = statement.ColumnString(3);
|
| - Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex,
|
| - &match.title_match_positions);
|
| - Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1),
|
| - &match.title_match_positions);
|
| -
|
| - // Extract the matches in the body.
|
| - Snippet::MatchPositions match_positions;
|
| - Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex,
|
| - &match_positions);
|
| -
|
| - // Compute the snippet based on those matches.
|
| - std::string body = statement.ColumnString(4);
|
| - match.snippet.ComputeSnippet(match_positions, body);
|
| - }
|
| - statement.Reset(true);
|
| - return result_count > options.EffectiveMaxCount();
|
| -}
|
| -
|
| -} // namespace history
|
|
|