| Index: chrome/browser/extensions/activity_log/counting_policy.cc
|
| diff --git a/chrome/browser/extensions/activity_log/counting_policy.cc b/chrome/browser/extensions/activity_log/counting_policy.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..40be36d0b9c7fa8b6808b561e0e7ebae7ecf98a7
|
| --- /dev/null
|
| +++ b/chrome/browser/extensions/activity_log/counting_policy.cc
|
| @@ -0,0 +1,270 @@
|
| +// Copyright 2013 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +// A policy for storing activity log data to a database that performs
|
| +// aggregation to reduce the size of the database. The database layout is
|
| +// nearly the same as FullStreamUIPolicy, which stores a complete log, with a
|
| +// few changes:
|
| +// - a "count" column is added to track how many log records were merged
|
| +// together into this row
|
| +// - the "time" column measures the most recent time that the current row was
|
| +// updated
|
| +// When writing a record, if a row already exists where all other columns
|
| +// (extension_id, action_type, api_name, args, urls, etc.) all match, and the
|
| +// previous time falls within today (the current time), then the count field on
|
| +// the old row is incremented. Otherwise, a new row is written.
|
| +//
|
| +// For many text columns, repeated strings are compressed by moving string
|
| +// storage to a separate table ("string_ids") and storing only an identifier in
|
| +// the logging table. For example, if the api_name_x column contained the
|
| +// value 4 and the string_ids table contained a row with primary key 4 and
|
| +// value 'tabs.query', then the api_name field should be taken to have the
|
| +// value 'tabs.query'. Each column ending with "_x" is compressed in this way.
|
| +// All lookups are to the string_ids table, except for the page_url_x and
|
| +// arg_url_x columns, which are converted via the url_ids table (this
|
| +// separation of URL values is to help simplify history clearing).
|
| +//
|
| +// For strings which are unique (only used once), this scheme will increase
|
| +// storage requirements; each string will appear twice, once in a string table
|
| +// and once in a string table index. For strings which appear twice storage
|
| +// should be approximately break-even. For strings which appear more than
|
| +// twice, this compression will likely save space.
|
| +//
|
| +// The activitylog_uncompressed view allows for simpler reading of the activity
|
| +// log contents with identifiers already translated to string values.
|
| +
|
| +// TODO(mvrable): Some additional tasks that need to be done for this policy:
|
| +// * Clean out old activity log records (say, those older than a couple of
|
| +// days, perhaps configurable or adaptive based on database size).
|
| +// * Prune strings from the string tables if they are no longer in use.
|
| +// * Analyze storage requirements in more detail.
|
| +// * Perhaps add heuristics for strings likely to be unique, and directly
|
| +// store those in the activitylog_compressed table?
|
| +// * Factor out common code that should be shared between policies, and make
|
| +// sure all sanitization (for example, of URLs) is done uniformly.
|
| +
|
| +#include "chrome/browser/extensions/activity_log/counting_policy.h"
|
| +
|
| +#include <map>
|
| +#include <string>
|
| +#include <vector>
|
| +
|
| +#include "base/json/json_string_value_serializer.h"
|
| +#include "base/strings/string_util.h"
|
| +#include "base/strings/stringprintf.h"
|
| +
|
| +namespace {
|
| +
|
| +// TODO(mvrable): Consider placing this in a common location.
|
| +std::string Serialize(const base::Value* value) {
|
| + std::string value_as_text;
|
| + if (!value) {
|
| + value_as_text = "";
|
| + } else {
|
| + JSONStringValueSerializer serializer(&value_as_text);
|
| + serializer.SerializeAndOmitBinaryValues(*value);
|
| + }
|
| + return value_as_text;
|
| +}
|
| +
|
| +// Given a day (timestamp at local midnight), compute the start of the
|
| +// following day. To allow for time zone changes, add more than a day then
|
| +// round down.
|
| +base::Time NextDay(const base::Time& day) {
|
| + return (day + base::TimeDelta::FromHours(36)).LocalMidnight();
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +namespace extensions {
|
| +
|
| +const char* CountingPolicy::kTableName = "activitylog_compressed";
|
| +const char* CountingPolicy::kTableContentFields[] = {
|
| + "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x",
|
| + "page_url_x", "page_title_x", "arg_url_x", "other_x"};
|
| +const char* CountingPolicy::kTableFieldTypes[] = {
|
| + "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER",
|
| + "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
|
| + "INTEGER"};
|
| +
|
| +static const char kPolicyTableView[] =
|
| + "DROP VIEW IF EXISTS activitylog_uncompressed;\n"
|
| + "CREATE VIEW activitylog_uncompressed AS\n"
|
| + "SELECT count,\n"
|
| + " x1.value AS extension_id,\n"
|
| + " time,\n"
|
| + " action_type,\n"
|
| + " x2.value AS api_name,\n"
|
| + " x3.value AS args,\n"
|
| + " x4.value AS page_url,\n"
|
| + " x5.value AS page_title,\n"
|
| + " x6.value AS arg_url,\n"
|
| + " x7.value AS other\n"
|
| + "FROM activitylog_compressed\n"
|
| + " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n"
|
| + " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n"
|
| + " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n"
|
| + " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n"
|
| + " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n"
|
| + " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n"
|
| + " LEFT JOIN string_ids AS x7 ON (x7.id = other_x)\n";
|
| +
|
| +CountingPolicy::CountingPolicy(Profile* profile)
|
| + : StreamWithoutArgsUIPolicy(profile) {}
|
| +
|
| +CountingPolicy::~CountingPolicy() {}
|
| +
|
| +bool CountingPolicy::InitDatabase(sql::Connection* db) {
|
| + // TODO(mvrable): Add logic to drop old database tables.
|
| +
|
| + if (!string_table_.Initialize(db))
|
| + return false;
|
| + if (!url_table_.Initialize(db))
|
| + return false;
|
| +
|
| + // Create the unified activity log entry table.
|
| + if (!ActivityDatabase::InitializeTable(db,
|
| + kTableName,
|
| + kTableContentFields,
|
| + kTableFieldTypes,
|
| + arraysize(kTableContentFields)))
|
| + return false;
|
| +
|
| + // Create a view for easily accessing the uncompressed form of the data.
|
| + return db->Execute(kPolicyTableView);
|
| +}
|
| +
|
| +bool CountingPolicy::FlushDatabase(sql::Connection* db) {
|
| + // Columns that must match exactly for database rows to be coalesced.
|
| + static const char* matched_columns[] = {
|
| + "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x",
|
| + "page_title_x", "arg_url_x", "other_x"};
|
| + LOG(INFO) << "Starting counting policy flush";
|
| + Action::ActionVector queue;
|
| + queue.swap(queued_actions_);
|
| +
|
| + if (queue.empty())
|
| + return true;
|
| +
|
| + sql::Transaction transaction(db);
|
| + if (!transaction.Begin())
|
| + return false;
|
| +
|
| + std::string insert_str =
|
| + "INSERT INTO " + std::string(kTableName) + "(count, time";
|
| + std::string update_str =
|
| + "UPDATE " + std::string(kTableName) +
|
| + " SET count = count + 1, time = max(?, time)"
|
| + " WHERE time >= ? AND time < ?";
|
| +
|
| + for (size_t i = 0; i < arraysize(matched_columns); i++) {
|
| + insert_str =
|
| + base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]);
|
| + update_str = base::StringPrintf(
|
| + "%s AND %s = ?", update_str.c_str(), matched_columns[i]);
|
| + }
|
| + insert_str += ") VALUES (1, ?";
|
| + for (size_t i = 0; i < arraysize(matched_columns); i++) {
|
| + insert_str += ", ?";
|
| + }
|
| + insert_str += ")";
|
| +
|
| + // TODO(mvrable): URL sanitization or summarization.
|
| +
|
| + Action::ActionVector::size_type i;
|
| + for (i = 0; i != queue.size(); ++i) {
|
| + const Action& action = *queue[i];
|
| +
|
| + base::Time day_start = action.time().LocalMidnight();
|
| + base::Time next_day = NextDay(day_start);
|
| +
|
| + // The contents in values must match up with fields in matched_columns.
|
| + int64 id;
|
| + std::vector<int64> matched_values;
|
| +
|
| + if (!string_table_.StringToInt(db, action.extension_id(), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + matched_values.push_back(static_cast<int>(action.action_type()));
|
| +
|
| + if (!string_table_.StringToInt(db, action.api_name(), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + if (!string_table_.StringToInt(db, Serialize(action.args()), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + if (!url_table_.StringToInt(db, action.page_url().spec(), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + // TODO(mvrable): Create a title_table_?
|
| + if (!string_table_.StringToInt(db, action.page_title(), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + if (!url_table_.StringToInt(db, action.arg_url().spec(), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + if (!string_table_.StringToInt(db, Serialize(action.other()), &id))
|
| + return false;
|
| + matched_values.push_back(id);
|
| +
|
| + // Assume there is an existing row for this action, and try to update the
|
| + // count.
|
| + sql::Statement update_statement(db->GetCachedStatement(
|
| + sql::StatementID(SQL_FROM_HERE), update_str.c_str()));
|
| + update_statement.BindInt64(0, action.time().ToInternalValue());
|
| + update_statement.BindInt64(1, day_start.ToInternalValue());
|
| + update_statement.BindInt64(2, next_day.ToInternalValue());
|
| + for (size_t j = 0; j < matched_values.size(); j++) {
|
| + update_statement.BindInt64(j + 3, matched_values[j]);
|
| + }
|
| + if (!update_statement.Run())
|
| + return false;
|
| +
|
| + // Check if the update succeeded (was the count of updated rows non-zero)?
|
| + // If it failed because no matching row existed, fall back to inserting a
|
| + // new record.
|
| + if (db->GetLastChangeCount() > 0) {
|
| + continue;
|
| + }
|
| + sql::Statement insert_statement(db->GetCachedStatement(
|
| + sql::StatementID(SQL_FROM_HERE), insert_str.c_str()));
|
| + insert_statement.BindInt64(0, action.time().ToInternalValue());
|
| + for (size_t j = 0; j < matched_values.size(); j++)
|
| + insert_statement.BindInt64(j + 1, matched_values[j]);
|
| + if (!insert_statement.Run())
|
| + return false;
|
| + }
|
| +
|
| + LOG(INFO) << "Committing counting policy flush";
|
| + if (!transaction.Commit())
|
| + return false;
|
| + LOG(INFO) << "Finished commit";
|
| + return true;
|
| +}
|
| +
|
| +#if 0
|
| +void CountingPolicy::ProcessAction(scoped_refptr<Action> action) {
|
| + // TODO(mvrable): Right now this argument stripping updates the Action object
|
| + // in place, which isn't good if there are other users of the object. When
|
| + // database writing is moved to policy class, the modifications should be
|
| + // made locally.
|
| + action = ProcessArguments(action);
|
| + ScheduleAndForget(this, &CountingPolicy::QueueAction, action);
|
| +}
|
| +
|
| +void CountingPolicy::QueueAction(scoped_refptr<Action> action) {
|
| + if (!activity_database()->is_db_valid())
|
| + return;
|
| +
|
| + std::map<scoped_refptr<Action>, int> queued_writes;
|
| +}
|
| +#endif
|
| +
|
| +} // namespace extensions
|
|
|