| OLD | NEW |
| (Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // A policy for storing activity log data to a database that performs |
| 6 // aggregation to reduce the size of the database. The database layout is |
| 7 // nearly the same as FullStreamUIPolicy, which stores a complete log, with a |
| 8 // few changes: |
| 9 // - a "count" column is added to track how many log records were merged |
| 10 // together into this row |
| 11 // - the "time" column measures the most recent time that the current row was |
| 12 // updated |
| 13 // When writing a record, if a row already exists where all other columns |
| 14 // (extension_id, action_type, api_name, args, urls, etc.) all match, and the |
| 15 // previous time falls within today (the current time), then the count field on |
| 16 // the old row is incremented. Otherwise, a new row is written. |
| 17 // |
| 18 // For many text columns, repeated strings are compressed by moving string |
| 19 // storage to a separate table ("string_ids") and storing only an identifier in |
| 20 // the logging table. For example, if the api_name_x column contained the |
| 21 // value 4 and the string_ids table contained a row with primary key 4 and |
| 22 // value 'tabs.query', then the api_name field should be taken to have the |
| 23 // value 'tabs.query'. Each column ending with "_x" is compressed in this way. |
| 24 // All lookups are to the string_ids table, except for the page_url_x and |
| 25 // arg_url_x columns, which are converted via the url_ids table (this |
| 26 // separation of URL values is to help simplify history clearing). |
| 27 // |
| 28 // For strings which are unique (only used once), this scheme will increase |
| 29 // storage requirements; each string will appear twice, once in a string table |
| 30 // and once in a string table index. For strings which appear twice storage |
| 31 // should be approximately break-even. For strings which appear more than |
| 32 // twice, this compression will likely save space. |
| 33 // |
| 34 // The activitylog_uncompressed view allows for simpler reading of the activity |
| 35 // log contents with identifiers already translated to string values. |
| 36 |
| 37 // TODO(mvrable): Some additional tasks that need to be done for this policy: |
| 38 // * Clean out old activity log records (say, those older than a couple of |
| 39 // days, perhaps configurable or adaptive based on database size). |
| 40 // * Prune strings from the string tables if they are no longer in use. |
| 41 // * Analyze storage requirements in more detail. |
| 42 // * Perhaps add heuristics for strings likely to be unique, and directly |
| 43 // store those in the activitylog_compressed table? |
| 44 // * Factor out common code that should be shared between policies, and make |
| 45 // sure all sanitization (for example, of URLs) is done uniformly. |
| 46 |
| 47 #include "chrome/browser/extensions/activity_log/counting_policy.h" |
| 48 |
| 49 #include <map> |
| 50 #include <string> |
| 51 #include <vector> |
| 52 |
| 53 #include "base/json/json_string_value_serializer.h" |
| 54 #include "base/strings/string_util.h" |
| 55 #include "base/strings/stringprintf.h" |
| 56 |
| 57 namespace { |
| 58 |
| 59 // TODO(mvrable): Consider placing this in a common location. |
| 60 std::string Serialize(const base::Value* value) { |
| 61 std::string value_as_text; |
| 62 if (!value) { |
| 63 value_as_text = ""; |
| 64 } else { |
| 65 JSONStringValueSerializer serializer(&value_as_text); |
| 66 serializer.SerializeAndOmitBinaryValues(*value); |
| 67 } |
| 68 return value_as_text; |
| 69 } |
| 70 |
| 71 // Given a day (timestamp at local midnight), compute the start of the |
| 72 // following day. To allow for time zone changes, add more than a day then |
| 73 // round down. |
| 74 base::Time NextDay(const base::Time& day) { |
| 75 return (day + base::TimeDelta::FromHours(36)).LocalMidnight(); |
| 76 } |
| 77 |
| 78 } // namespace |
| 79 |
| 80 namespace extensions { |
| 81 |
| 82 const char* CountingPolicy::kTableName = "activitylog_compressed"; |
| 83 const char* CountingPolicy::kTableContentFields[] = { |
| 84 "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x", |
| 85 "page_url_x", "page_title_x", "arg_url_x", "other_x"}; |
| 86 const char* CountingPolicy::kTableFieldTypes[] = { |
| 87 "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER", |
| 88 "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", |
| 89 "INTEGER"}; |
| 90 |
| 91 static const char kPolicyTableView[] = |
| 92 "DROP VIEW IF EXISTS activitylog_uncompressed;\n" |
| 93 "CREATE VIEW activitylog_uncompressed AS\n" |
| 94 "SELECT count,\n" |
| 95 " x1.value AS extension_id,\n" |
| 96 " time,\n" |
| 97 " action_type,\n" |
| 98 " x2.value AS api_name,\n" |
| 99 " x3.value AS args,\n" |
| 100 " x4.value AS page_url,\n" |
| 101 " x5.value AS page_title,\n" |
| 102 " x6.value AS arg_url,\n" |
| 103 " x7.value AS other\n" |
| 104 "FROM activitylog_compressed\n" |
| 105 " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n" |
| 106 " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n" |
| 107 " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n" |
| 108 " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n" |
| 109 " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n" |
| 110 " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n" |
| 111 " LEFT JOIN string_ids AS x7 ON (x7.id = other_x)\n"; |
| 112 |
| 113 CountingPolicy::CountingPolicy(Profile* profile) |
| 114 : StreamWithoutArgsUIPolicy(profile) {} |
| 115 |
| 116 CountingPolicy::~CountingPolicy() {} |
| 117 |
| 118 bool CountingPolicy::InitDatabase(sql::Connection* db) { |
| 119 // TODO(mvrable): Add logic to drop old database tables. |
| 120 |
| 121 if (!string_table_.Initialize(db)) |
| 122 return false; |
| 123 if (!url_table_.Initialize(db)) |
| 124 return false; |
| 125 |
| 126 // Create the unified activity log entry table. |
| 127 if (!ActivityDatabase::InitializeTable(db, |
| 128 kTableName, |
| 129 kTableContentFields, |
| 130 kTableFieldTypes, |
| 131 arraysize(kTableContentFields))) |
| 132 return false; |
| 133 |
| 134 // Create a view for easily accessing the uncompressed form of the data. |
| 135 return db->Execute(kPolicyTableView); |
| 136 } |
| 137 |
| 138 bool CountingPolicy::FlushDatabase(sql::Connection* db) { |
| 139 // Columns that must match exactly for database rows to be coalesced. |
| 140 static const char* matched_columns[] = { |
| 141 "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x", |
| 142 "page_title_x", "arg_url_x", "other_x"}; |
| 143 LOG(INFO) << "Starting counting policy flush"; |
| 144 Action::ActionVector queue; |
| 145 queue.swap(queued_actions_); |
| 146 |
| 147 if (queue.empty()) |
| 148 return true; |
| 149 |
| 150 sql::Transaction transaction(db); |
| 151 if (!transaction.Begin()) |
| 152 return false; |
| 153 |
| 154 std::string insert_str = |
| 155 "INSERT INTO " + std::string(kTableName) + "(count, time"; |
| 156 std::string update_str = |
| 157 "UPDATE " + std::string(kTableName) + |
| 158 " SET count = count + 1, time = max(?, time)" |
| 159 " WHERE time >= ? AND time < ?"; |
| 160 |
| 161 for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| 162 insert_str = |
| 163 base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]); |
| 164 update_str = base::StringPrintf( |
| 165 "%s AND %s = ?", update_str.c_str(), matched_columns[i]); |
| 166 } |
| 167 insert_str += ") VALUES (1, ?"; |
| 168 for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| 169 insert_str += ", ?"; |
| 170 } |
| 171 insert_str += ")"; |
| 172 |
| 173 // TODO(mvrable): URL sanitization or summarization. |
| 174 |
| 175 Action::ActionVector::size_type i; |
| 176 for (i = 0; i != queue.size(); ++i) { |
| 177 const Action& action = *queue[i]; |
| 178 |
| 179 base::Time day_start = action.time().LocalMidnight(); |
| 180 base::Time next_day = NextDay(day_start); |
| 181 |
| 182 // The contents in values must match up with fields in matched_columns. |
| 183 int64 id; |
| 184 std::vector<int64> matched_values; |
| 185 |
| 186 if (!string_table_.StringToInt(db, action.extension_id(), &id)) |
| 187 return false; |
| 188 matched_values.push_back(id); |
| 189 |
| 190 matched_values.push_back(static_cast<int>(action.action_type())); |
| 191 |
| 192 if (!string_table_.StringToInt(db, action.api_name(), &id)) |
| 193 return false; |
| 194 matched_values.push_back(id); |
| 195 |
| 196 if (!string_table_.StringToInt(db, Serialize(action.args()), &id)) |
| 197 return false; |
| 198 matched_values.push_back(id); |
| 199 |
| 200 if (!url_table_.StringToInt(db, action.page_url().spec(), &id)) |
| 201 return false; |
| 202 matched_values.push_back(id); |
| 203 |
| 204 // TODO(mvrable): Create a title_table_? |
| 205 if (!string_table_.StringToInt(db, action.page_title(), &id)) |
| 206 return false; |
| 207 matched_values.push_back(id); |
| 208 |
| 209 if (!url_table_.StringToInt(db, action.arg_url().spec(), &id)) |
| 210 return false; |
| 211 matched_values.push_back(id); |
| 212 |
| 213 if (!string_table_.StringToInt(db, Serialize(action.other()), &id)) |
| 214 return false; |
| 215 matched_values.push_back(id); |
| 216 |
| 217 // Assume there is an existing row for this action, and try to update the |
| 218 // count. |
| 219 sql::Statement update_statement(db->GetCachedStatement( |
| 220 sql::StatementID(SQL_FROM_HERE), update_str.c_str())); |
| 221 update_statement.BindInt64(0, action.time().ToInternalValue()); |
| 222 update_statement.BindInt64(1, day_start.ToInternalValue()); |
| 223 update_statement.BindInt64(2, next_day.ToInternalValue()); |
| 224 for (size_t j = 0; j < matched_values.size(); j++) { |
| 225 update_statement.BindInt64(j + 3, matched_values[j]); |
| 226 } |
| 227 if (!update_statement.Run()) |
| 228 return false; |
| 229 |
| 230 // Check if the update succeeded (was the count of updated rows non-zero)? |
| 231 // If it failed because no matching row existed, fall back to inserting a |
| 232 // new record. |
| 233 if (db->GetLastChangeCount() > 0) { |
| 234 continue; |
| 235 } |
| 236 sql::Statement insert_statement(db->GetCachedStatement( |
| 237 sql::StatementID(SQL_FROM_HERE), insert_str.c_str())); |
| 238 insert_statement.BindInt64(0, action.time().ToInternalValue()); |
| 239 for (size_t j = 0; j < matched_values.size(); j++) |
| 240 insert_statement.BindInt64(j + 1, matched_values[j]); |
| 241 if (!insert_statement.Run()) |
| 242 return false; |
| 243 } |
| 244 |
| 245 LOG(INFO) << "Committing counting policy flush"; |
| 246 if (!transaction.Commit()) |
| 247 return false; |
| 248 LOG(INFO) << "Finished commit"; |
| 249 return true; |
| 250 } |
| 251 |
| 252 #if 0 |
| 253 void CountingPolicy::ProcessAction(scoped_refptr<Action> action) { |
| 254 // TODO(mvrable): Right now this argument stripping updates the Action object |
| 255 // in place, which isn't good if there are other users of the object. When |
| 256 // database writing is moved to policy class, the modifications should be |
| 257 // made locally. |
| 258 action = ProcessArguments(action); |
| 259 ScheduleAndForget(this, &CountingPolicy::QueueAction, action); |
| 260 } |
| 261 |
| 262 void CountingPolicy::QueueAction(scoped_refptr<Action> action) { |
| 263 if (!activity_database()->is_db_valid()) |
| 264 return; |
| 265 |
| 266 std::map<scoped_refptr<Action>, int> queued_writes; |
| 267 } |
| 268 #endif |
| 269 |
| 270 } // namespace extensions |
| OLD | NEW |