| Index: chrome/browser/extensions/activity_log/counting_policy.cc | 
| diff --git a/chrome/browser/extensions/activity_log/counting_policy.cc b/chrome/browser/extensions/activity_log/counting_policy.cc | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..40be36d0b9c7fa8b6808b561e0e7ebae7ecf98a7 | 
| --- /dev/null | 
| +++ b/chrome/browser/extensions/activity_log/counting_policy.cc | 
| @@ -0,0 +1,270 @@ | 
| +// Copyright 2013 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +// A policy for storing activity log data to a database that performs | 
| +// aggregation to reduce the size of the database.  The database layout is | 
| +// nearly the same as FullStreamUIPolicy, which stores a complete log, with a | 
| +// few changes: | 
| +//   - a "count" column is added to track how many log records were merged | 
| +//     together into this row | 
| +//   - the "time" column measures the most recent time that the current row was | 
| +//     updated | 
| +// When writing a record, if a row already exists where all other columns | 
| +// (extension_id, action_type, api_name, args, urls, etc.) all match, and the | 
| +// previous time falls within today (the current time), then the count field on | 
| +// the old row is incremented.  Otherwise, a new row is written. | 
| +// | 
| +// For many text columns, repeated strings are compressed by moving string | 
| +// storage to a separate table ("string_ids") and storing only an identifier in | 
| +// the logging table.  For example, if the api_name_x column contained the | 
| +// value 4 and the string_ids table contained a row with primary key 4 and | 
| +// value 'tabs.query', then the api_name field should be taken to have the | 
| +// value 'tabs.query'.  Each column ending with "_x" is compressed in this way. | 
| +// All lookups are to the string_ids table, except for the page_url_x and | 
| +// arg_url_x columns, which are converted via the url_ids table (this | 
| +// separation of URL values is to help simplify history clearing). | 
| +// | 
| +// For strings which are unique (only used once), this scheme will increase | 
| +// storage requirements; each string will appear twice, once in a string table | 
| +// and once in a string table index.  For strings which appear twice storage | 
| +// should be approximately break-even.  For strings which appear more than | 
| +// twice, this compression will likely save space. | 
| +// | 
| +// The activitylog_uncompressed view allows for simpler reading of the activity | 
| +// log contents with identifiers already translated to string values. | 
| + | 
| +// TODO(mvrable): Some additional tasks that need to be done for this policy: | 
| +//  * Clean out old activity log records (say, those older than a couple of | 
| +//    days, perhaps configurable or adaptive based on database size). | 
| +//  * Prune strings from the string tables if they are no longer in use. | 
| +//  * Analyze storage requirements in more detail. | 
| +//  * Perhaps add heuristics for strings likely to be unique, and directly | 
| +//    store those in the activitylog_compressed table? | 
| +//  * Factor out common code that should be shared between policies, and make | 
| +//    sure all sanitization (for example, of URLs) is done uniformly. | 
| + | 
| +#include "chrome/browser/extensions/activity_log/counting_policy.h" | 
| + | 
| +#include <map> | 
| +#include <string> | 
| +#include <vector> | 
| + | 
| +#include "base/json/json_string_value_serializer.h" | 
| +#include "base/strings/string_util.h" | 
| +#include "base/strings/stringprintf.h" | 
| + | 
| +namespace { | 
| + | 
| +// TODO(mvrable): Consider placing this in a common location. | 
| +std::string Serialize(const base::Value* value) { | 
| +  std::string value_as_text; | 
| +  if (!value) { | 
| +    value_as_text = ""; | 
| +  } else { | 
| +    JSONStringValueSerializer serializer(&value_as_text); | 
| +    serializer.SerializeAndOmitBinaryValues(*value); | 
| +  } | 
| +  return value_as_text; | 
| +} | 
| + | 
| +// Given a day (timestamp at local midnight), compute the start of the | 
| +// following day.  To allow for time zone changes, add more than a day then | 
| +// round down. | 
| +base::Time NextDay(const base::Time& day) { | 
| +  return (day + base::TimeDelta::FromHours(36)).LocalMidnight(); | 
| +} | 
| + | 
| +}  // namespace | 
| + | 
| +namespace extensions { | 
| + | 
| +const char* CountingPolicy::kTableName = "activitylog_compressed"; | 
| +const char* CountingPolicy::kTableContentFields[] = { | 
| +    "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x", | 
| +    "page_url_x", "page_title_x", "arg_url_x", "other_x"}; | 
| +const char* CountingPolicy::kTableFieldTypes[] = { | 
| +    "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER", | 
| +    "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", | 
| +    "INTEGER"}; | 
| + | 
| +static const char kPolicyTableView[] = | 
| +    "DROP VIEW IF EXISTS activitylog_uncompressed;\n" | 
| +    "CREATE VIEW activitylog_uncompressed AS\n" | 
| +    "SELECT count,\n" | 
| +    "    x1.value AS extension_id,\n" | 
| +    "    time,\n" | 
| +    "    action_type,\n" | 
| +    "    x2.value AS api_name,\n" | 
| +    "    x3.value AS args,\n" | 
| +    "    x4.value AS page_url,\n" | 
| +    "    x5.value AS page_title,\n" | 
| +    "    x6.value AS arg_url,\n" | 
| +    "    x7.value AS other\n" | 
| +    "FROM activitylog_compressed\n" | 
| +    "    LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n" | 
| +    "    LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n" | 
| +    "    LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n" | 
| +    "    LEFT JOIN url_ids    AS x4 ON (x4.id = page_url_x)\n" | 
| +    "    LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n" | 
| +    "    LEFT JOIN url_ids    AS x6 ON (x6.id = arg_url_x)\n" | 
| +    "    LEFT JOIN string_ids AS x7 ON (x7.id = other_x)\n"; | 
| + | 
| +CountingPolicy::CountingPolicy(Profile* profile) | 
| +    : StreamWithoutArgsUIPolicy(profile) {} | 
| + | 
| +CountingPolicy::~CountingPolicy() {} | 
| + | 
| +bool CountingPolicy::InitDatabase(sql::Connection* db) { | 
| +  // TODO(mvrable): Add logic to drop old database tables. | 
| + | 
| +  if (!string_table_.Initialize(db)) | 
| +    return false; | 
| +  if (!url_table_.Initialize(db)) | 
| +    return false; | 
| + | 
| +  // Create the unified activity log entry table. | 
| +  if (!ActivityDatabase::InitializeTable(db, | 
| +                                         kTableName, | 
| +                                         kTableContentFields, | 
| +                                         kTableFieldTypes, | 
| +                                         arraysize(kTableContentFields))) | 
| +    return false; | 
| + | 
| +  // Create a view for easily accessing the uncompressed form of the data. | 
| +  return db->Execute(kPolicyTableView); | 
| +} | 
| + | 
| +bool CountingPolicy::FlushDatabase(sql::Connection* db) { | 
| +  // Columns that must match exactly for database rows to be coalesced. | 
| +  static const char* matched_columns[] = { | 
| +      "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x", | 
| +      "page_title_x", "arg_url_x", "other_x"}; | 
| +  LOG(INFO) << "Starting counting policy flush"; | 
| +  Action::ActionVector queue; | 
| +  queue.swap(queued_actions_); | 
| + | 
| +  if (queue.empty()) | 
| +    return true; | 
| + | 
| +  sql::Transaction transaction(db); | 
| +  if (!transaction.Begin()) | 
| +    return false; | 
| + | 
| +  std::string insert_str = | 
| +      "INSERT INTO " + std::string(kTableName) + "(count, time"; | 
| +  std::string update_str = | 
| +      "UPDATE " + std::string(kTableName) + | 
| +      " SET count = count + 1, time = max(?, time)" | 
| +      " WHERE time >= ? AND time < ?"; | 
| + | 
| +  for (size_t i = 0; i < arraysize(matched_columns); i++) { | 
| +    insert_str = | 
| +        base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]); | 
| +    update_str = base::StringPrintf( | 
| +        "%s AND %s = ?", update_str.c_str(), matched_columns[i]); | 
| +  } | 
| +  insert_str += ") VALUES (1, ?"; | 
| +  for (size_t i = 0; i < arraysize(matched_columns); i++) { | 
| +    insert_str += ", ?"; | 
| +  } | 
| +  insert_str += ")"; | 
| + | 
| +  // TODO(mvrable): URL sanitization or summarization. | 
| + | 
| +  Action::ActionVector::size_type i; | 
| +  for (i = 0; i != queue.size(); ++i) { | 
| +    const Action& action = *queue[i]; | 
| + | 
| +    base::Time day_start = action.time().LocalMidnight(); | 
| +    base::Time next_day = NextDay(day_start); | 
| + | 
| +    // The contents in values must match up with fields in matched_columns. | 
| +    int64 id; | 
| +    std::vector<int64> matched_values; | 
| + | 
| +    if (!string_table_.StringToInt(db, action.extension_id(), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    matched_values.push_back(static_cast<int>(action.action_type())); | 
| + | 
| +    if (!string_table_.StringToInt(db, action.api_name(), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    if (!string_table_.StringToInt(db, Serialize(action.args()), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    if (!url_table_.StringToInt(db, action.page_url().spec(), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    // TODO(mvrable): Create a title_table_? | 
| +    if (!string_table_.StringToInt(db, action.page_title(), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    if (!url_table_.StringToInt(db, action.arg_url().spec(), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    if (!string_table_.StringToInt(db, Serialize(action.other()), &id)) | 
| +      return false; | 
| +    matched_values.push_back(id); | 
| + | 
| +    // Assume there is an existing row for this action, and try to update the | 
| +    // count. | 
| +    sql::Statement update_statement(db->GetCachedStatement( | 
| +        sql::StatementID(SQL_FROM_HERE), update_str.c_str())); | 
| +    update_statement.BindInt64(0, action.time().ToInternalValue()); | 
| +    update_statement.BindInt64(1, day_start.ToInternalValue()); | 
| +    update_statement.BindInt64(2, next_day.ToInternalValue()); | 
| +    for (size_t j = 0; j < matched_values.size(); j++) { | 
| +      update_statement.BindInt64(j + 3, matched_values[j]); | 
| +    } | 
| +    if (!update_statement.Run()) | 
| +      return false; | 
| + | 
| +    // Check if the update succeeded (was the count of updated rows non-zero)? | 
| +    // If it failed because no matching row existed, fall back to inserting a | 
| +    // new record. | 
| +    if (db->GetLastChangeCount() > 0) { | 
| +      continue; | 
| +    } | 
| +    sql::Statement insert_statement(db->GetCachedStatement( | 
| +        sql::StatementID(SQL_FROM_HERE), insert_str.c_str())); | 
| +    insert_statement.BindInt64(0, action.time().ToInternalValue()); | 
| +    for (size_t j = 0; j < matched_values.size(); j++) | 
| +      insert_statement.BindInt64(j + 1, matched_values[j]); | 
| +    if (!insert_statement.Run()) | 
| +      return false; | 
| +  } | 
| + | 
| +  LOG(INFO) << "Committing counting policy flush"; | 
| +  if (!transaction.Commit()) | 
| +    return false; | 
| +  LOG(INFO) << "Finished commit"; | 
| +  return true; | 
| +} | 
| + | 
| +#if 0 | 
| +void CountingPolicy::ProcessAction(scoped_refptr<Action> action) { | 
| +  // TODO(mvrable): Right now this argument stripping updates the Action object | 
| +  // in place, which isn't good if there are other users of the object.  When | 
| +  // database writing is moved to policy class, the modifications should be | 
| +  // made locally. | 
| +  action = ProcessArguments(action); | 
| +  ScheduleAndForget(this, &CountingPolicy::QueueAction, action); | 
| +} | 
| + | 
| +void CountingPolicy::QueueAction(scoped_refptr<Action> action) { | 
| +  if (!activity_database()->is_db_valid()) | 
| +    return; | 
| + | 
| +  std::map<scoped_refptr<Action>, int> queued_writes; | 
| +} | 
| +#endif | 
| + | 
| +}  // namespace extensions | 
|  |