Chromium Code Reviews| Index: chrome/browser/extensions/activity_log/counting_policy.cc |
| diff --git a/chrome/browser/extensions/activity_log/counting_policy.cc b/chrome/browser/extensions/activity_log/counting_policy.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..306498312c89c43e876d90fdb9d56e3cfaeb5e23 |
| --- /dev/null |
| +++ b/chrome/browser/extensions/activity_log/counting_policy.cc |
| @@ -0,0 +1,524 @@ |
| +// Copyright 2013 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// A policy for storing activity log data to a database that performs |
| +// aggregation to reduce the size of the database. The database layout is |
| +// nearly the same as FullStreamUIPolicy, which stores a complete log, with a |
| +// few changes: |
| +// - a "count" column is added to track how many log records were merged |
| +// together into this row |
| +// - the "time" column measures the most recent time that the current row was |
| +// updated |
| +// When writing a record, if a row already exists where all other columns |
| +// (extension_id, action_type, api_name, args, urls, etc.) all match, and the |
| +// previous time falls within today (the current time), then the count field on |
| +// the old row is incremented. Otherwise, a new row is written. |
| +// |
| +// For many text columns, repeated strings are compressed by moving string |
| +// storage to a separate table ("string_ids") and storing only an identifier in |
| +// the logging table. For example, if the api_name_x column contained the |
| +// value 4 and the string_ids table contained a row with primary key 4 and |
| +// value 'tabs.query', then the api_name field should be taken to have the |
| +// value 'tabs.query'. Each column ending with "_x" is compressed in this way. |
| +// All lookups are to the string_ids table, except for the page_url_x and |
| +// arg_url_x columns, which are converted via the url_ids table (this |
| +// separation of URL values is to help simplify history clearing). |
| +// |
| +// The activitylog_uncompressed view allows for simpler reading of the activity |
| +// log contents with identifiers already translated to string values. |
| + |
| +#include "chrome/browser/extensions/activity_log/counting_policy.h" |
| + |
| +#include <map> |
| +#include <string> |
| +#include <vector> |
| + |
| +#include "base/callback.h" |
| +#include "base/files/file_path.h" |
| +#include "base/json/json_reader.h" |
| +#include "base/json/json_string_value_serializer.h" |
| +#include "base/strings/string_util.h" |
| +#include "base/strings/stringprintf.h" |
| +#include "chrome/common/chrome_constants.h" |
| + |
| +using content::BrowserThread; |
| + |
| +namespace { |
| + |
| +// Delay between cleaning passes (to delete old action records) through the |
| +// database. |
| +const int kCleaningDelayInHours = 12; |
| + |
| +// We should log the arguments to these API calls. Be careful when |
| +// constructing this whitelist to not keep arguments that might compromise |
| +// privacy by logging too much data to the activity log. |
| +// |
| +// TODO(mvrable): The contents of this whitelist should be reviewed and |
| +// expanded as needed. |
| +const char* kAlwaysLog[] = {"extension.connect", "extension.sendMessage", |
| + "tabs.executeScript", "tabs.insertCSS"}; |
| + |
| +// Columns in the main database table. See the file-level comment for a |
| +// discussion of how data is stored and the meanings of the _x columns. |
| +const char* kTableContentFields[] = { |
| + "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x", |
| + "page_url_x", "page_title_x", "arg_url_x", "other_x"}; |
| +const char* kTableFieldTypes[] = { |
| + "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER", |
| + "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", |
| + "INTEGER"}; |
| + |
| +// Miscellaneous SQL commands for initializing the database; these should be |
| +// idempotent. |
| +static const char kPolicyMiscSetup[] = |
| + // The activitylog_uncompressed view performs string lookups for simpler |
| + // access to the log data. |
| + "DROP VIEW IF EXISTS activitylog_uncompressed;\n" |
| + "CREATE VIEW activitylog_uncompressed AS\n" |
| + "SELECT count,\n" |
| + " x1.value AS extension_id,\n" |
| + " time,\n" |
| + " action_type,\n" |
| + " x2.value AS api_name,\n" |
| + " x3.value AS args,\n" |
| + " x4.value AS page_url,\n" |
| + " x5.value AS page_title,\n" |
| + " x6.value AS arg_url,\n" |
| + " x7.value AS other\n" |
| + "FROM activitylog_compressed\n" |
| + " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n" |
| + " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n" |
| + " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n" |
| + " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n" |
| + " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n" |
| + " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n" |
| + " LEFT JOIN string_ids AS x7 ON (x7.id = other_x);\n" |
| + // An index on all fields except count and time: all the fields that aren't |
| + // changed when incrementing a count. This should accelerate finding the |
| + // rows to update (at worst several rows will need to be checked to find |
| + // the one in the right time range). |
| + "CREATE INDEX IF NOT EXISTS activitylog_compressed_index\n" |
| + "ON activitylog_compressed(extension_id_x, action_type, api_name_x,\n" |
| + " args_x, page_url_x, page_title_x, arg_url_x, other_x)"; |
| + |
| +// SQL statements to clean old, unused entries out of the string and URL id |
| +// tables. |
| +static const char kStringTableCleanup[] = |
| + "DELETE FROM string_ids WHERE id NOT IN\n" |
| + "(SELECT extension_id_x FROM activitylog_compressed\n" |
| + " UNION SELECT api_name_x FROM activitylog_compressed\n" |
| + " UNION SELECT args_x FROM activitylog_compressed\n" |
| + " UNION SELECT page_title_x FROM activitylog_compressed\n" |
| + " UNION SELECT other_x FROM activitylog_compressed)"; |
| +static const char kUrlTableCleanup[] = |
| + "DELETE FROM url_ids WHERE id NOT IN\n" |
| + "(SELECT page_url_x FROM activitylog_compressed\n" |
| + " UNION SELECT arg_url_x FROM activitylog_compressed)"; |
| + |
| +} // namespace |
| + |
| +namespace extensions { |
| + |
| +// A specialized Action subclass which is used to represent an action read from |
| +// the database with a corresponding count. |
| +class CountedAction : public Action { |
| + public: |
| + CountedAction(const std::string& extension_id, |
| + const base::Time& time, |
| + const ActionType action_type, |
| + const std::string& api_name) |
| + : Action(extension_id, time, action_type, api_name) {} |
| + |
| + // Number of merged records for this action. |
| + int count() const { return count_; } |
| + void set_count(int count) { count_ = count; } |
| + |
| + virtual std::string PrintForDebug() const OVERRIDE; |
| + |
| + protected: |
| + virtual ~CountedAction() {} |
| + |
| + private: |
| + int count_; |
| +}; |
| + |
| +std::string CountedAction::PrintForDebug() const { |
| + return base::StringPrintf( |
| + "%s COUNT=%d", Action::PrintForDebug().c_str(), count()); |
| +} |
| + |
| +const char* CountingPolicy::kTableName = "activitylog_compressed"; |
| +const char* CountingPolicy::kReadViewName = "activitylog_uncompressed"; |
| + |
| +CountingPolicy::CountingPolicy(Profile* profile) |
| + : ActivityLogDatabasePolicy( |
| + profile, |
| + base::FilePath(chrome::kExtensionActivityLogFilename)), |
| + string_table_("string_ids"), |
| + url_table_("url_ids"), |
| + closing_(false), |
| + retention_time_(base::TimeDelta::FromHours(60)) { |
| + for (size_t i = 0; i < arraysize(kAlwaysLog); i++) { |
| + api_arg_whitelist_.insert(kAlwaysLog[i]); |
| + } |
| +} |
| + |
| +CountingPolicy::~CountingPolicy() {} |
| + |
| +bool CountingPolicy::InitDatabase(sql::Connection* db) { |
| + if (!Util::DropObsoleteTables(db)) |
| + return false; |
| + |
| + if (!string_table_.Initialize(db)) |
| + return false; |
| + if (!url_table_.Initialize(db)) |
| + return false; |
| + |
| + // Create the unified activity log entry table. |
| + if (!ActivityDatabase::InitializeTable(db, |
| + kTableName, |
| + kTableContentFields, |
| + kTableFieldTypes, |
| + arraysize(kTableContentFields))) |
| + return false; |
| + |
| + // Create a view for easily accessing the uncompressed form of the data, and |
| + // any necessary indexes if needed. |
| + return db->Execute(kPolicyMiscSetup); |
| +} |
| + |
| +void CountingPolicy::ProcessAction(scoped_refptr<Action> action) { |
| + ScheduleAndForget(this, &CountingPolicy::QueueAction, action); |
| +} |
| + |
| +void CountingPolicy::QueueAction(scoped_refptr<Action> action) { |
| + if (activity_database()->is_db_valid()) { |
| + action = action->Clone(); |
| + Util::SanitizeAction(action); |
| + Util::StripArguments(api_arg_whitelist_, action); |
| + queued_actions_.push_back(action); |
| + activity_database()->AdviseFlush(queued_actions_.size()); |
| + } |
| +} |
| + |
| +bool CountingPolicy::FlushDatabase(sql::Connection* db) { |
| + // Columns that must match exactly for database rows to be coalesced. |
| + static const char* matched_columns[] = { |
| + "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x", |
| + "page_title_x", "arg_url_x", "other_x"}; |
| + Action::ActionVector queue; |
| + queue.swap(queued_actions_); |
| + |
| + // Whether to clean old records out of the activity log database. Do this |
| + // much less frequently than database flushes since it is expensive, but |
| + // always check on the first database flush (since there might be a large |
| + // amount of data to clear). |
| + // |
| + // We do not clean old old database records on shutdown, for both performance |
| + // and correctness reasons. We don't want to slow down shutting down the |
| + // browser. Additionally, in unit tests the call to Now() may invoke a mock |
| + // clock which has already been freed by the time the flush on close |
| + // happens--so we can't call Now() when the policy is shutting down. |
| + bool clean_database = false; |
| + if (!closing_) { |
|
felt
2013/08/07 01:09:42
is closing_ touched on both the DB and UI threads?
mvrable
2013/08/07 17:01:19
You're right, it is. Thinking about this some mor
felt
2013/08/08 02:08:50
Thanks, I like the updated version better.
|
| + clean_database = (last_database_cleaning_time_.is_null() || |
| + Now() - last_database_cleaning_time_ > |
| + base::TimeDelta::FromHours(kCleaningDelayInHours)); |
| + } |
| + |
| + if (queue.empty() && !clean_database) |
| + return true; |
| + |
| + sql::Transaction transaction(db); |
| + if (!transaction.Begin()) |
| + return false; |
| + |
| + std::string insert_str = |
| + "INSERT INTO " + std::string(kTableName) + "(count, time"; |
| + std::string update_str = |
| + "UPDATE " + std::string(kTableName) + |
| + " SET count = count + 1, time = max(?, time)" |
| + " WHERE time >= ? AND time < ?"; |
| + |
| + for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| + insert_str = |
| + base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]); |
| + update_str = base::StringPrintf( |
| + "%s AND %s IS ?", update_str.c_str(), matched_columns[i]); |
| + } |
| + insert_str += ") VALUES (1, ?"; |
| + for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| + insert_str += ", ?"; |
| + } |
| + insert_str += ")"; |
| + |
| + Action::ActionVector::size_type i; |
| + for (i = 0; i != queue.size(); ++i) { |
| + const Action& action = *queue[i]; |
| + |
| + base::Time day_start = action.time().LocalMidnight(); |
| + base::Time next_day = Util::AddDays(day_start, 1); |
| + |
| + // The contents in values must match up with fields in matched_columns. A |
| + // value of -1 is used to encode a null database value. |
| + int64 id; |
| + std::vector<int64> matched_values; |
| + |
| + if (!string_table_.StringToInt(db, action.extension_id(), &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + |
| + matched_values.push_back(static_cast<int>(action.action_type())); |
| + |
| + if (!string_table_.StringToInt(db, action.api_name(), &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + |
| + if (action.args()) { |
| + std::string args = Util::Serialize(action.args()); |
| + // TODO(mvrable): For now, truncate long argument lists. This is a |
| + // workaround for excessively-long values coming from DOM logging. When |
| + // the V8ValueConverter is fixed to return more reasonable values, we can |
| + // drop the truncation. |
| + if (args.length() > 10000) { |
| + args = "[\"<too_large>\"]"; |
| + } |
| + if (!string_table_.StringToInt(db, args, &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + } else { |
| + matched_values.push_back(-1); |
| + } |
| + |
| + std::string page_url_string = |
| + Util::UrlToString(action.page_url(), action.page_incognito()); |
| + if (!page_url_string.empty()) { |
| + if (!url_table_.StringToInt(db, page_url_string, &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + } else { |
| + matched_values.push_back(-1); |
| + } |
| + |
| + // TODO(mvrable): Create a title_table_? |
| + if (!action.page_title().empty()) { |
| + if (!string_table_.StringToInt(db, action.page_title(), &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + } else { |
| + matched_values.push_back(-1); |
| + } |
| + |
| + std::string arg_url_string = |
| + Util::UrlToString(action.arg_url(), action.arg_incognito()); |
| + if (!arg_url_string.empty()) { |
| + if (!url_table_.StringToInt(db, arg_url_string, &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + } else { |
| + matched_values.push_back(-1); |
| + } |
| + |
| + if (action.other()) { |
| + if (!string_table_.StringToInt(db, Util::Serialize(action.other()), &id)) |
| + return false; |
| + matched_values.push_back(id); |
| + } else { |
| + matched_values.push_back(-1); |
| + } |
| + |
| + // Assume there is an existing row for this action, and try to update the |
| + // count. |
| + sql::Statement update_statement(db->GetCachedStatement( |
| + sql::StatementID(SQL_FROM_HERE), update_str.c_str())); |
| + update_statement.BindInt64(0, action.time().ToInternalValue()); |
| + update_statement.BindInt64(1, day_start.ToInternalValue()); |
| + update_statement.BindInt64(2, next_day.ToInternalValue()); |
| + for (size_t j = 0; j < matched_values.size(); j++) { |
| + if (matched_values[j] == -1) |
| + update_statement.BindNull(j + 3); |
| + else |
| + update_statement.BindInt64(j + 3, matched_values[j]); |
| + } |
| + if (!update_statement.Run()) |
| + return false; |
| + |
| + // Check if the update succeeded (was the count of updated rows non-zero)? |
| + // If it failed because no matching row existed, fall back to inserting a |
| + // new record. |
| + if (db->GetLastChangeCount() > 0) { |
| + if (db->GetLastChangeCount() > 1) { |
| + LOG(WARNING) << "Found and updated multiple rows in the activity log " |
| + << "database; counts may be off!"; |
| + } |
| + continue; |
| + } |
| + sql::Statement insert_statement(db->GetCachedStatement( |
| + sql::StatementID(SQL_FROM_HERE), insert_str.c_str())); |
| + insert_statement.BindInt64(0, action.time().ToInternalValue()); |
| + for (size_t j = 0; j < matched_values.size(); j++) { |
| + if (matched_values[j] == -1) |
| + update_statement.BindNull(j + 1); |
| + else |
| + insert_statement.BindInt64(j + 1, matched_values[j]); |
| + } |
| + if (!insert_statement.Run()) |
| + return false; |
| + } |
| + |
| + if (clean_database) { |
| + LOG(ERROR) << "Cleaning database"; |
| + base::Time cutoff = (Now() - retention_time()).LocalMidnight(); |
| + if (!CleanOlderThan(db, cutoff)) |
| + return false; |
| + last_database_cleaning_time_ = Now(); |
| + } |
| + |
| + if (!transaction.Commit()) |
| + return false; |
| + |
| + return true; |
| +} |
| + |
| +scoped_ptr<Action::ActionVector> CountingPolicy::DoReadData( |
| + const std::string& extension_id, |
| + const int days_ago) { |
| + // Ensure data is flushed to the database first so that we query over all |
| + // data. |
| + activity_database()->AdviseFlush(ActivityDatabase::kFlushImmediately); |
| + |
| + DCHECK_GE(days_ago, 0); |
| + scoped_ptr<Action::ActionVector> actions(new Action::ActionVector()); |
| + |
| + sql::Connection* db = GetDatabaseConnection(); |
| + if (!db) { |
| + return actions.Pass(); |
| + } |
| + |
| + int64 early_bound; |
| + int64 late_bound; |
| + Util::ComputeDatabaseTimeBounds(Now(), days_ago, &early_bound, &late_bound); |
| + std::string query_str = base::StringPrintf( |
| + "SELECT time, action_type, api_name, args, page_url, page_title, " |
| + "arg_url, other, count " |
| + "FROM %s WHERE extension_id=? AND time>? AND time<=? " |
| + "ORDER BY time DESC", |
| + kReadViewName); |
| + sql::Statement query(db->GetCachedStatement(SQL_FROM_HERE, |
| + query_str.c_str())); |
| + query.BindString(0, extension_id); |
| + query.BindInt64(1, early_bound); |
| + query.BindInt64(2, late_bound); |
| + |
| + while (query.is_valid() && query.Step()) { |
| + scoped_refptr<CountedAction> action = |
| + new CountedAction(extension_id, |
| + base::Time::FromInternalValue(query.ColumnInt64(0)), |
| + static_cast<Action::ActionType>(query.ColumnInt(1)), |
| + query.ColumnString(2)); |
| + |
| + if (query.ColumnType(3) != sql::COLUMN_TYPE_NULL) { |
| + scoped_ptr<Value> parsed_value( |
| + base::JSONReader::Read(query.ColumnString(3))); |
| + if (parsed_value && parsed_value->IsType(Value::TYPE_LIST)) { |
| + action->set_args( |
| + make_scoped_ptr(static_cast<ListValue*>(parsed_value.release()))); |
| + } else { |
| + LOG(WARNING) << "Unable to parse args: '" << query.ColumnString(3) |
| + << "'"; |
| + } |
| + } |
| + |
| + GURL page_url(query.ColumnString(4)); |
| + action->set_page_url(page_url); |
| + |
| + action->set_page_title(query.ColumnString(5)); |
| + |
| + GURL arg_url(query.ColumnString(6)); |
| + action->set_arg_url(arg_url); |
| + |
| + if (query.ColumnType(7) != sql::COLUMN_TYPE_NULL) { |
| + scoped_ptr<Value> parsed_value( |
| + base::JSONReader::Read(query.ColumnString(7))); |
| + if (parsed_value && parsed_value->IsType(Value::TYPE_DICTIONARY)) { |
| + action->set_other(make_scoped_ptr( |
| + static_cast<DictionaryValue*>(parsed_value.release()))); |
| + } else { |
| + LOG(WARNING) << "Unable to parse other: '" << query.ColumnString(7) |
| + << "'"; |
| + } |
| + } |
| + |
| + action->set_count(query.ColumnInt(8)); |
| + |
| + actions->push_back(action); |
| + } |
| + |
| + return actions.Pass(); |
| +} |
| + |
| +void CountingPolicy::ReadData( |
| + const std::string& extension_id, |
| + const int day, |
| + const base::Callback<void(scoped_ptr<Action::ActionVector>)>& callback) { |
| + BrowserThread::PostTaskAndReplyWithResult( |
| + BrowserThread::DB, |
| + FROM_HERE, |
| + base::Bind(&CountingPolicy::DoReadData, |
| + base::Unretained(this), |
| + extension_id, |
| + day), |
| + callback); |
| +} |
| + |
| +void CountingPolicy::OnDatabaseFailure() { |
| + queued_actions_.clear(); |
| +} |
| + |
| +void CountingPolicy::OnDatabaseClose() { |
| + delete this; |
| +} |
| + |
| +// Cleans old records from the activity log database. |
| +bool CountingPolicy::CleanOlderThan(sql::Connection* db, |
| + const base::Time& cutoff) { |
| + std::string clean_statement = |
| + "DELETE FROM " + std::string(kTableName) + " WHERE time < ?"; |
| + sql::Statement cleaner(db->GetCachedStatement(sql::StatementID(SQL_FROM_HERE), |
| + clean_statement.c_str())); |
| + cleaner.BindInt64(0, cutoff.ToInternalValue()); |
| + if (!cleaner.Run()) |
| + return false; |
| + return CleanStringTables(db); |
| +} |
| + |
| +// Cleans unused interned strings from the database. This should be run after |
| +// deleting rows from the main log table to clean out stale values. |
| +bool CountingPolicy::CleanStringTables(sql::Connection* db) { |
| + sql::Statement cleaner1(db->GetCachedStatement( |
| + sql::StatementID(SQL_FROM_HERE), kStringTableCleanup)); |
| + if (!cleaner1.Run()) |
| + return false; |
| + if (db->GetLastChangeCount() > 0) |
| + string_table_.ClearCache(); |
| + |
| + sql::Statement cleaner2(db->GetCachedStatement( |
| + sql::StatementID(SQL_FROM_HERE), kUrlTableCleanup)); |
| + if (!cleaner2.Run()) |
| + return false; |
| + if (db->GetLastChangeCount() > 0) |
| + url_table_.ClearCache(); |
| + |
| + return true; |
| +} |
| + |
| +void CountingPolicy::Close() { |
| + closing_ = true; |
| + |
| + // The policy object should have never been created if there's no DB thread. |
| + DCHECK(BrowserThread::IsMessageLoopValid(BrowserThread::DB)); |
| + ScheduleAndForget(activity_database(), &ActivityDatabase::Close); |
| +} |
| + |
| +} // namespace extensions |