Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1773)

Unified Diff: chrome/browser/extensions/activity_log/counting_policy.cc

Issue 21646004: Compressed activity log database storage (Closed) Base URL: http://git.chromium.org/chromium/src.git@refactor-cleanups
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/extensions/activity_log/counting_policy.cc
diff --git a/chrome/browser/extensions/activity_log/counting_policy.cc b/chrome/browser/extensions/activity_log/counting_policy.cc
new file mode 100644
index 0000000000000000000000000000000000000000..40be36d0b9c7fa8b6808b561e0e7ebae7ecf98a7
--- /dev/null
+++ b/chrome/browser/extensions/activity_log/counting_policy.cc
@@ -0,0 +1,270 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A policy for storing activity log data to a database that performs
+// aggregation to reduce the size of the database. The database layout is
+// nearly the same as FullStreamUIPolicy, which stores a complete log, with a
+// few changes:
+// - a "count" column is added to track how many log records were merged
+// together into this row
+// - the "time" column measures the most recent time that the current row was
+// updated
+// When writing a record, if a row already exists where all other columns
+// (extension_id, action_type, api_name, args, urls, etc.) all match, and the
+// previous time falls within today (the current time), then the count field on
+// the old row is incremented. Otherwise, a new row is written.
+//
+// For many text columns, repeated strings are compressed by moving string
+// storage to a separate table ("string_ids") and storing only an identifier in
+// the logging table. For example, if the api_name_x column contained the
+// value 4 and the string_ids table contained a row with primary key 4 and
+// value 'tabs.query', then the api_name field should be taken to have the
+// value 'tabs.query'. Each column ending with "_x" is compressed in this way.
+// All lookups are to the string_ids table, except for the page_url_x and
+// arg_url_x columns, which are converted via the url_ids table (this
+// separation of URL values is to help simplify history clearing).
+//
+// For strings which are unique (only used once), this scheme will increase
+// storage requirements; each string will appear twice, once in a string table
+// and once in a string table index. For strings which appear twice storage
+// should be approximately break-even. For strings which appear more than
+// twice, this compression will likely save space.
+//
+// The activitylog_uncompressed view allows for simpler reading of the activity
+// log contents with identifiers already translated to string values.
+
+// TODO(mvrable): Some additional tasks that need to be done for this policy:
+// * Clean out old activity log records (say, those older than a couple of
+// days, perhaps configurable or adaptive based on database size).
+// * Prune strings from the string tables if they are no longer in use.
+// * Analyze storage requirements in more detail.
+// * Perhaps add heuristics for strings likely to be unique, and directly
+// store those in the activitylog_compressed table?
+// * Factor out common code that should be shared between policies, and make
+// sure all sanitization (for example, of URLs) is done uniformly.
+
+#include "chrome/browser/extensions/activity_log/counting_policy.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/json/json_string_value_serializer.h"
+#include "base/strings/string_util.h"
+#include "base/strings/stringprintf.h"
+
+namespace {
+
+// TODO(mvrable): Consider placing this in a common location.
+std::string Serialize(const base::Value* value) {
+ std::string value_as_text;
+ if (!value) {
+ value_as_text = "";
+ } else {
+ JSONStringValueSerializer serializer(&value_as_text);
+ serializer.SerializeAndOmitBinaryValues(*value);
+ }
+ return value_as_text;
+}
+
+// Given a day (timestamp at local midnight), compute the start of the
+// following day. To allow for time zone changes, add more than a day then
+// round down.
+base::Time NextDay(const base::Time& day) {
+ return (day + base::TimeDelta::FromHours(36)).LocalMidnight();
+}
+
+} // namespace
+
+namespace extensions {
+
+const char* CountingPolicy::kTableName = "activitylog_compressed";
+const char* CountingPolicy::kTableContentFields[] = {
+ "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x",
+ "page_url_x", "page_title_x", "arg_url_x", "other_x"};
+const char* CountingPolicy::kTableFieldTypes[] = {
+ "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER",
+ "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
+ "INTEGER"};
+
+static const char kPolicyTableView[] =
+ "DROP VIEW IF EXISTS activitylog_uncompressed;\n"
+ "CREATE VIEW activitylog_uncompressed AS\n"
+ "SELECT count,\n"
+ " x1.value AS extension_id,\n"
+ " time,\n"
+ " action_type,\n"
+ " x2.value AS api_name,\n"
+ " x3.value AS args,\n"
+ " x4.value AS page_url,\n"
+ " x5.value AS page_title,\n"
+ " x6.value AS arg_url,\n"
+ " x7.value AS other\n"
+ "FROM activitylog_compressed\n"
+ " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n"
+ " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n"
+ " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n"
+ " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n"
+ " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n"
+ " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n"
+ " LEFT JOIN string_ids AS x7 ON (x7.id = other_x)\n";
+
+CountingPolicy::CountingPolicy(Profile* profile)
+ : StreamWithoutArgsUIPolicy(profile) {}
+
+CountingPolicy::~CountingPolicy() {}
+
+bool CountingPolicy::InitDatabase(sql::Connection* db) {
+ // TODO(mvrable): Add logic to drop old database tables.
+
+ if (!string_table_.Initialize(db))
+ return false;
+ if (!url_table_.Initialize(db))
+ return false;
+
+ // Create the unified activity log entry table.
+ if (!ActivityDatabase::InitializeTable(db,
+ kTableName,
+ kTableContentFields,
+ kTableFieldTypes,
+ arraysize(kTableContentFields)))
+ return false;
+
+ // Create a view for easily accessing the uncompressed form of the data.
+ return db->Execute(kPolicyTableView);
+}
+
+bool CountingPolicy::FlushDatabase(sql::Connection* db) {
+ // Columns that must match exactly for database rows to be coalesced.
+ static const char* matched_columns[] = {
+ "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x",
+ "page_title_x", "arg_url_x", "other_x"};
+ LOG(INFO) << "Starting counting policy flush";
+ Action::ActionVector queue;
+ queue.swap(queued_actions_);
+
+ if (queue.empty())
+ return true;
+
+ sql::Transaction transaction(db);
+ if (!transaction.Begin())
+ return false;
+
+ std::string insert_str =
+ "INSERT INTO " + std::string(kTableName) + "(count, time";
+ std::string update_str =
+ "UPDATE " + std::string(kTableName) +
+ " SET count = count + 1, time = max(?, time)"
+ " WHERE time >= ? AND time < ?";
+
+ for (size_t i = 0; i < arraysize(matched_columns); i++) {
+ insert_str =
+ base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]);
+ update_str = base::StringPrintf(
+ "%s AND %s = ?", update_str.c_str(), matched_columns[i]);
+ }
+ insert_str += ") VALUES (1, ?";
+ for (size_t i = 0; i < arraysize(matched_columns); i++) {
+ insert_str += ", ?";
+ }
+ insert_str += ")";
+
+ // TODO(mvrable): URL sanitization or summarization.
+
+ Action::ActionVector::size_type i;
+ for (i = 0; i != queue.size(); ++i) {
+ const Action& action = *queue[i];
+
+ base::Time day_start = action.time().LocalMidnight();
+ base::Time next_day = NextDay(day_start);
+
+ // The contents in values must match up with fields in matched_columns.
+ int64 id;
+ std::vector<int64> matched_values;
+
+ if (!string_table_.StringToInt(db, action.extension_id(), &id))
+ return false;
+ matched_values.push_back(id);
+
+ matched_values.push_back(static_cast<int>(action.action_type()));
+
+ if (!string_table_.StringToInt(db, action.api_name(), &id))
+ return false;
+ matched_values.push_back(id);
+
+ if (!string_table_.StringToInt(db, Serialize(action.args()), &id))
+ return false;
+ matched_values.push_back(id);
+
+ if (!url_table_.StringToInt(db, action.page_url().spec(), &id))
+ return false;
+ matched_values.push_back(id);
+
+ // TODO(mvrable): Create a title_table_?
+ if (!string_table_.StringToInt(db, action.page_title(), &id))
+ return false;
+ matched_values.push_back(id);
+
+ if (!url_table_.StringToInt(db, action.arg_url().spec(), &id))
+ return false;
+ matched_values.push_back(id);
+
+ if (!string_table_.StringToInt(db, Serialize(action.other()), &id))
+ return false;
+ matched_values.push_back(id);
+
+ // Assume there is an existing row for this action, and try to update the
+ // count.
+ sql::Statement update_statement(db->GetCachedStatement(
+ sql::StatementID(SQL_FROM_HERE), update_str.c_str()));
+ update_statement.BindInt64(0, action.time().ToInternalValue());
+ update_statement.BindInt64(1, day_start.ToInternalValue());
+ update_statement.BindInt64(2, next_day.ToInternalValue());
+ for (size_t j = 0; j < matched_values.size(); j++) {
+ update_statement.BindInt64(j + 3, matched_values[j]);
+ }
+ if (!update_statement.Run())
+ return false;
+
+ // Check if the update succeeded (was the count of updated rows non-zero)?
+ // If it failed because no matching row existed, fall back to inserting a
+ // new record.
+ if (db->GetLastChangeCount() > 0) {
+ continue;
+ }
+ sql::Statement insert_statement(db->GetCachedStatement(
+ sql::StatementID(SQL_FROM_HERE), insert_str.c_str()));
+ insert_statement.BindInt64(0, action.time().ToInternalValue());
+ for (size_t j = 0; j < matched_values.size(); j++)
+ insert_statement.BindInt64(j + 1, matched_values[j]);
+ if (!insert_statement.Run())
+ return false;
+ }
+
+ LOG(INFO) << "Committing counting policy flush";
+ if (!transaction.Commit())
+ return false;
+ LOG(INFO) << "Finished commit";
+ return true;
+}
+
+#if 0
+void CountingPolicy::ProcessAction(scoped_refptr<Action> action) {
+ // TODO(mvrable): Right now this argument stripping updates the Action object
+ // in place, which isn't good if there are other users of the object. When
+ // database writing is moved to policy class, the modifications should be
+ // made locally.
+ action = ProcessArguments(action);
+ ScheduleAndForget(this, &CountingPolicy::QueueAction, action);
+}
+
+void CountingPolicy::QueueAction(scoped_refptr<Action> action) {
+ if (!activity_database()->is_db_valid())
+ return;
+
+ std::map<scoped_refptr<Action>, int> queued_writes;
+}
+#endif
+
+} // namespace extensions

Powered by Google App Engine
This is Rietveld 408576698