| OLD | NEW |
| (Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // A policy for storing activity log data to a database that performs |
| 6 // aggregation to reduce the size of the database. The database layout is |
| 7 // nearly the same as FullStreamUIPolicy, which stores a complete log, with a |
| 8 // few changes: |
| 9 // - a "count" column is added to track how many log records were merged |
| 10 // together into this row |
| 11 // - the "time" column measures the most recent time that the current row was |
| 12 // updated |
| 13 // When writing a record, if a row already exists where all other columns |
| 14 // (extension_id, action_type, api_name, args, urls, etc.) all match, and the |
| 15 // previous time falls within today (the current time), then the count field on |
| 16 // the old row is incremented. Otherwise, a new row is written. |
| 17 // |
| 18 // For many text columns, repeated strings are compressed by moving string |
| 19 // storage to a separate table ("string_ids") and storing only an identifier in |
| 20 // the logging table. For example, if the api_name_x column contained the |
| 21 // value 4 and the string_ids table contained a row with primary key 4 and |
| 22 // value 'tabs.query', then the api_name field should be taken to have the |
| 23 // value 'tabs.query'. Each column ending with "_x" is compressed in this way. |
| 24 // All lookups are to the string_ids table, except for the page_url_x and |
| 25 // arg_url_x columns, which are converted via the url_ids table (this |
| 26 // separation of URL values is to help simplify history clearing). |
| 27 // |
| 28 // The activitylog_uncompressed view allows for simpler reading of the activity |
| 29 // log contents with identifiers already translated to string values. |
| 30 |
| 31 #include "chrome/browser/extensions/activity_log/counting_policy.h" |
| 32 |
| 33 #include <map> |
| 34 #include <string> |
| 35 #include <vector> |
| 36 |
| 37 #include "base/callback.h" |
| 38 #include "base/files/file_path.h" |
| 39 #include "base/json/json_reader.h" |
| 40 #include "base/json/json_string_value_serializer.h" |
| 41 #include "base/strings/string_util.h" |
| 42 #include "base/strings/stringprintf.h" |
| 43 #include "chrome/common/chrome_constants.h" |
| 44 |
| 45 using content::BrowserThread; |
| 46 |
| 47 namespace { |
| 48 |
| 49 // Delay between cleaning passes (to delete old action records) through the |
| 50 // database. |
| 51 const int kCleaningDelayInHours = 12; |
| 52 |
| 53 // We should log the arguments to these API calls. Be careful when |
| 54 // constructing this whitelist to not keep arguments that might compromise |
| 55 // privacy by logging too much data to the activity log. |
| 56 // |
| 57 // TODO(mvrable): The contents of this whitelist should be reviewed and |
| 58 // expanded as needed. |
| 59 const char* kAlwaysLog[] = {"extension.connect", "extension.sendMessage", |
| 60 "tabs.executeScript", "tabs.insertCSS"}; |
| 61 |
| 62 // Columns in the main database table. See the file-level comment for a |
| 63 // discussion of how data is stored and the meanings of the _x columns. |
| 64 const char* kTableContentFields[] = { |
| 65 "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x", |
| 66 "page_url_x", "page_title_x", "arg_url_x", "other_x"}; |
| 67 const char* kTableFieldTypes[] = { |
| 68 "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER", |
| 69 "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", |
| 70 "INTEGER"}; |
| 71 |
| 72 // Miscellaneous SQL commands for initializing the database; these should be |
| 73 // idempotent. |
| 74 static const char kPolicyMiscSetup[] = |
| 75 // The activitylog_uncompressed view performs string lookups for simpler |
| 76 // access to the log data. |
| 77 "DROP VIEW IF EXISTS activitylog_uncompressed;\n" |
| 78 "CREATE VIEW activitylog_uncompressed AS\n" |
| 79 "SELECT count,\n" |
| 80 " x1.value AS extension_id,\n" |
| 81 " time,\n" |
| 82 " action_type,\n" |
| 83 " x2.value AS api_name,\n" |
| 84 " x3.value AS args,\n" |
| 85 " x4.value AS page_url,\n" |
| 86 " x5.value AS page_title,\n" |
| 87 " x6.value AS arg_url,\n" |
| 88 " x7.value AS other\n" |
| 89 "FROM activitylog_compressed\n" |
| 90 " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n" |
| 91 " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n" |
| 92 " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n" |
| 93 " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n" |
| 94 " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n" |
| 95 " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n" |
| 96 " LEFT JOIN string_ids AS x7 ON (x7.id = other_x);\n" |
| 97 // An index on all fields except count and time: all the fields that aren't |
| 98 // changed when incrementing a count. This should accelerate finding the |
| 99 // rows to update (at worst several rows will need to be checked to find |
| 100 // the one in the right time range). |
| 101 "CREATE INDEX IF NOT EXISTS activitylog_compressed_index\n" |
| 102 "ON activitylog_compressed(extension_id_x, action_type, api_name_x,\n" |
| 103 " args_x, page_url_x, page_title_x, arg_url_x, other_x)"; |
| 104 |
| 105 // SQL statements to clean old, unused entries out of the string and URL id |
| 106 // tables. |
| 107 static const char kStringTableCleanup[] = |
| 108 "DELETE FROM string_ids WHERE id NOT IN\n" |
| 109 "(SELECT extension_id_x FROM activitylog_compressed\n" |
| 110 " UNION SELECT api_name_x FROM activitylog_compressed\n" |
| 111 " UNION SELECT args_x FROM activitylog_compressed\n" |
| 112 " UNION SELECT page_title_x FROM activitylog_compressed\n" |
| 113 " UNION SELECT other_x FROM activitylog_compressed)"; |
| 114 static const char kUrlTableCleanup[] = |
| 115 "DELETE FROM url_ids WHERE id NOT IN\n" |
| 116 "(SELECT page_url_x FROM activitylog_compressed\n" |
| 117 " UNION SELECT arg_url_x FROM activitylog_compressed)"; |
| 118 |
| 119 } // namespace |
| 120 |
| 121 namespace extensions { |
| 122 |
| 123 // A specialized Action subclass which is used to represent an action read from |
| 124 // the database with a corresponding count. |
| 125 class CountedAction : public Action { |
| 126 public: |
| 127 CountedAction(const std::string& extension_id, |
| 128 const base::Time& time, |
| 129 const ActionType action_type, |
| 130 const std::string& api_name) |
| 131 : Action(extension_id, time, action_type, api_name) {} |
| 132 |
| 133 // Number of merged records for this action. |
| 134 int count() const { return count_; } |
| 135 void set_count(int count) { count_ = count; } |
| 136 |
| 137 virtual std::string PrintForDebug() const OVERRIDE; |
| 138 |
| 139 protected: |
| 140 virtual ~CountedAction() {} |
| 141 |
| 142 private: |
| 143 int count_; |
| 144 }; |
| 145 |
| 146 std::string CountedAction::PrintForDebug() const { |
| 147 return base::StringPrintf( |
| 148 "%s COUNT=%d", Action::PrintForDebug().c_str(), count()); |
| 149 } |
| 150 |
| 151 const char* CountingPolicy::kTableName = "activitylog_compressed"; |
| 152 const char* CountingPolicy::kReadViewName = "activitylog_uncompressed"; |
| 153 |
| 154 CountingPolicy::CountingPolicy(Profile* profile) |
| 155 : ActivityLogDatabasePolicy( |
| 156 profile, |
| 157 base::FilePath(chrome::kExtensionActivityLogFilename)), |
| 158 string_table_("string_ids"), |
| 159 url_table_("url_ids"), |
| 160 retention_time_(base::TimeDelta::FromHours(60)) { |
| 161 for (size_t i = 0; i < arraysize(kAlwaysLog); i++) { |
| 162 api_arg_whitelist_.insert(kAlwaysLog[i]); |
| 163 } |
| 164 } |
| 165 |
| 166 CountingPolicy::~CountingPolicy() {} |
| 167 |
| 168 bool CountingPolicy::InitDatabase(sql::Connection* db) { |
| 169 if (!Util::DropObsoleteTables(db)) |
| 170 return false; |
| 171 |
| 172 if (!string_table_.Initialize(db)) |
| 173 return false; |
| 174 if (!url_table_.Initialize(db)) |
| 175 return false; |
| 176 |
| 177 // Create the unified activity log entry table. |
| 178 if (!ActivityDatabase::InitializeTable(db, |
| 179 kTableName, |
| 180 kTableContentFields, |
| 181 kTableFieldTypes, |
| 182 arraysize(kTableContentFields))) |
| 183 return false; |
| 184 |
| 185 // Create a view for easily accessing the uncompressed form of the data, and |
| 186 // any necessary indexes if needed. |
| 187 return db->Execute(kPolicyMiscSetup); |
| 188 } |
| 189 |
| 190 void CountingPolicy::ProcessAction(scoped_refptr<Action> action) { |
| 191 ScheduleAndForget(this, &CountingPolicy::QueueAction, action); |
| 192 } |
| 193 |
| 194 void CountingPolicy::QueueAction(scoped_refptr<Action> action) { |
| 195 if (activity_database()->is_db_valid()) { |
| 196 action = action->Clone(); |
| 197 Util::StripPrivacySensitiveFields(action); |
| 198 Util::StripArguments(api_arg_whitelist_, action); |
| 199 queued_actions_.push_back(action); |
| 200 activity_database()->AdviseFlush(queued_actions_.size()); |
| 201 } |
| 202 } |
| 203 |
| 204 bool CountingPolicy::FlushDatabase(sql::Connection* db) { |
| 205 // Columns that must match exactly for database rows to be coalesced. |
| 206 static const char* matched_columns[] = { |
| 207 "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x", |
| 208 "page_title_x", "arg_url_x", "other_x"}; |
| 209 Action::ActionVector queue; |
| 210 queue.swap(queued_actions_); |
| 211 |
| 212 // Whether to clean old records out of the activity log database. Do this |
| 213 // much less frequently than database flushes since it is expensive, but |
| 214 // always check on the first database flush (since there might be a large |
| 215 // amount of data to clear). |
| 216 bool clean_database = (last_database_cleaning_time_.is_null() || |
| 217 Now() - last_database_cleaning_time_ > |
| 218 base::TimeDelta::FromHours(kCleaningDelayInHours)); |
| 219 |
| 220 if (queue.empty() && !clean_database) |
| 221 return true; |
| 222 |
| 223 sql::Transaction transaction(db); |
| 224 if (!transaction.Begin()) |
| 225 return false; |
| 226 |
| 227 std::string insert_str = |
| 228 "INSERT INTO " + std::string(kTableName) + "(count, time"; |
| 229 std::string update_str = |
| 230 "UPDATE " + std::string(kTableName) + |
| 231 " SET count = count + 1, time = max(?, time)" |
| 232 " WHERE time >= ? AND time < ?"; |
| 233 |
| 234 for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| 235 insert_str = |
| 236 base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]); |
| 237 update_str = base::StringPrintf( |
| 238 "%s AND %s IS ?", update_str.c_str(), matched_columns[i]); |
| 239 } |
| 240 insert_str += ") VALUES (1, ?"; |
| 241 for (size_t i = 0; i < arraysize(matched_columns); i++) { |
| 242 insert_str += ", ?"; |
| 243 } |
| 244 insert_str += ")"; |
| 245 |
| 246 Action::ActionVector::size_type i; |
| 247 for (i = 0; i != queue.size(); ++i) { |
| 248 const Action& action = *queue[i]; |
| 249 |
| 250 base::Time day_start = action.time().LocalMidnight(); |
| 251 base::Time next_day = Util::AddDays(day_start, 1); |
| 252 |
| 253 // The contents in values must match up with fields in matched_columns. A |
| 254 // value of -1 is used to encode a null database value. |
| 255 int64 id; |
| 256 std::vector<int64> matched_values; |
| 257 |
| 258 if (!string_table_.StringToInt(db, action.extension_id(), &id)) |
| 259 return false; |
| 260 matched_values.push_back(id); |
| 261 |
| 262 matched_values.push_back(static_cast<int>(action.action_type())); |
| 263 |
| 264 if (!string_table_.StringToInt(db, action.api_name(), &id)) |
| 265 return false; |
| 266 matched_values.push_back(id); |
| 267 |
| 268 if (action.args()) { |
| 269 std::string args = Util::Serialize(action.args()); |
| 270 // TODO(mvrable): For now, truncate long argument lists. This is a |
| 271 // workaround for excessively-long values coming from DOM logging. When |
| 272 // the V8ValueConverter is fixed to return more reasonable values, we can |
| 273 // drop the truncation. |
| 274 if (args.length() > 10000) { |
| 275 args = "[\"<too_large>\"]"; |
| 276 } |
| 277 if (!string_table_.StringToInt(db, args, &id)) |
| 278 return false; |
| 279 matched_values.push_back(id); |
| 280 } else { |
| 281 matched_values.push_back(-1); |
| 282 } |
| 283 |
| 284 std::string page_url_string = action.SerializePageUrl(); |
| 285 if (!page_url_string.empty()) { |
| 286 if (!url_table_.StringToInt(db, page_url_string, &id)) |
| 287 return false; |
| 288 matched_values.push_back(id); |
| 289 } else { |
| 290 matched_values.push_back(-1); |
| 291 } |
| 292 |
| 293 // TODO(mvrable): Create a title_table_? |
| 294 if (!action.page_title().empty()) { |
| 295 if (!string_table_.StringToInt(db, action.page_title(), &id)) |
| 296 return false; |
| 297 matched_values.push_back(id); |
| 298 } else { |
| 299 matched_values.push_back(-1); |
| 300 } |
| 301 |
| 302 std::string arg_url_string = action.SerializeArgUrl(); |
| 303 if (!arg_url_string.empty()) { |
| 304 if (!url_table_.StringToInt(db, arg_url_string, &id)) |
| 305 return false; |
| 306 matched_values.push_back(id); |
| 307 } else { |
| 308 matched_values.push_back(-1); |
| 309 } |
| 310 |
| 311 if (action.other()) { |
| 312 if (!string_table_.StringToInt(db, Util::Serialize(action.other()), &id)) |
| 313 return false; |
| 314 matched_values.push_back(id); |
| 315 } else { |
| 316 matched_values.push_back(-1); |
| 317 } |
| 318 |
| 319 // Assume there is an existing row for this action, and try to update the |
| 320 // count. |
| 321 sql::Statement update_statement(db->GetCachedStatement( |
| 322 sql::StatementID(SQL_FROM_HERE), update_str.c_str())); |
| 323 update_statement.BindInt64(0, action.time().ToInternalValue()); |
| 324 update_statement.BindInt64(1, day_start.ToInternalValue()); |
| 325 update_statement.BindInt64(2, next_day.ToInternalValue()); |
| 326 for (size_t j = 0; j < matched_values.size(); j++) { |
| 327 if (matched_values[j] == -1) |
| 328 update_statement.BindNull(j + 3); |
| 329 else |
| 330 update_statement.BindInt64(j + 3, matched_values[j]); |
| 331 } |
| 332 if (!update_statement.Run()) |
| 333 return false; |
| 334 |
| 335 // Check if the update succeeded (was the count of updated rows non-zero)? |
| 336 // If it failed because no matching row existed, fall back to inserting a |
| 337 // new record. |
| 338 if (db->GetLastChangeCount() > 0) { |
| 339 if (db->GetLastChangeCount() > 1) { |
| 340 LOG(WARNING) << "Found and updated multiple rows in the activity log " |
| 341 << "database; counts may be off!"; |
| 342 } |
| 343 continue; |
| 344 } |
| 345 sql::Statement insert_statement(db->GetCachedStatement( |
| 346 sql::StatementID(SQL_FROM_HERE), insert_str.c_str())); |
| 347 insert_statement.BindInt64(0, action.time().ToInternalValue()); |
| 348 for (size_t j = 0; j < matched_values.size(); j++) { |
| 349 if (matched_values[j] == -1) |
| 350 update_statement.BindNull(j + 1); |
| 351 else |
| 352 insert_statement.BindInt64(j + 1, matched_values[j]); |
| 353 } |
| 354 if (!insert_statement.Run()) |
| 355 return false; |
| 356 } |
| 357 |
| 358 if (clean_database) { |
| 359 base::Time cutoff = (Now() - retention_time()).LocalMidnight(); |
| 360 if (!CleanOlderThan(db, cutoff)) |
| 361 return false; |
| 362 last_database_cleaning_time_ = Now(); |
| 363 } |
| 364 |
| 365 if (!transaction.Commit()) |
| 366 return false; |
| 367 |
| 368 return true; |
| 369 } |
| 370 |
| 371 scoped_ptr<Action::ActionVector> CountingPolicy::DoReadData( |
| 372 const std::string& extension_id, |
| 373 const int days_ago) { |
| 374 // Ensure data is flushed to the database first so that we query over all |
| 375 // data. |
| 376 activity_database()->AdviseFlush(ActivityDatabase::kFlushImmediately); |
| 377 |
| 378 DCHECK_GE(days_ago, 0); |
| 379 scoped_ptr<Action::ActionVector> actions(new Action::ActionVector()); |
| 380 |
| 381 sql::Connection* db = GetDatabaseConnection(); |
| 382 if (!db) { |
| 383 return actions.Pass(); |
| 384 } |
| 385 |
| 386 int64 early_bound; |
| 387 int64 late_bound; |
| 388 Util::ComputeDatabaseTimeBounds(Now(), days_ago, &early_bound, &late_bound); |
| 389 std::string query_str = base::StringPrintf( |
| 390 "SELECT time, action_type, api_name, args, page_url, page_title, " |
| 391 "arg_url, other, count " |
| 392 "FROM %s WHERE extension_id=? AND time>? AND time<=? " |
| 393 "ORDER BY time DESC", |
| 394 kReadViewName); |
| 395 sql::Statement query(db->GetCachedStatement(SQL_FROM_HERE, |
| 396 query_str.c_str())); |
| 397 query.BindString(0, extension_id); |
| 398 query.BindInt64(1, early_bound); |
| 399 query.BindInt64(2, late_bound); |
| 400 |
| 401 while (query.is_valid() && query.Step()) { |
| 402 scoped_refptr<CountedAction> action = |
| 403 new CountedAction(extension_id, |
| 404 base::Time::FromInternalValue(query.ColumnInt64(0)), |
| 405 static_cast<Action::ActionType>(query.ColumnInt(1)), |
| 406 query.ColumnString(2)); |
| 407 |
| 408 if (query.ColumnType(3) != sql::COLUMN_TYPE_NULL) { |
| 409 scoped_ptr<Value> parsed_value( |
| 410 base::JSONReader::Read(query.ColumnString(3))); |
| 411 if (parsed_value && parsed_value->IsType(Value::TYPE_LIST)) { |
| 412 action->set_args( |
| 413 make_scoped_ptr(static_cast<ListValue*>(parsed_value.release()))); |
| 414 } else { |
| 415 LOG(WARNING) << "Unable to parse args: '" << query.ColumnString(3) |
| 416 << "'"; |
| 417 } |
| 418 } |
| 419 |
| 420 action->ParsePageUrl(query.ColumnString(4)); |
| 421 action->set_page_title(query.ColumnString(5)); |
| 422 action->ParseArgUrl(query.ColumnString(6)); |
| 423 |
| 424 if (query.ColumnType(7) != sql::COLUMN_TYPE_NULL) { |
| 425 scoped_ptr<Value> parsed_value( |
| 426 base::JSONReader::Read(query.ColumnString(7))); |
| 427 if (parsed_value && parsed_value->IsType(Value::TYPE_DICTIONARY)) { |
| 428 action->set_other(make_scoped_ptr( |
| 429 static_cast<DictionaryValue*>(parsed_value.release()))); |
| 430 } else { |
| 431 LOG(WARNING) << "Unable to parse other: '" << query.ColumnString(7) |
| 432 << "'"; |
| 433 } |
| 434 } |
| 435 |
| 436 action->set_count(query.ColumnInt(8)); |
| 437 |
| 438 actions->push_back(action); |
| 439 } |
| 440 |
| 441 return actions.Pass(); |
| 442 } |
| 443 |
| 444 void CountingPolicy::ReadData( |
| 445 const std::string& extension_id, |
| 446 const int day, |
| 447 const base::Callback<void(scoped_ptr<Action::ActionVector>)>& callback) { |
| 448 BrowserThread::PostTaskAndReplyWithResult( |
| 449 BrowserThread::DB, |
| 450 FROM_HERE, |
| 451 base::Bind(&CountingPolicy::DoReadData, |
| 452 base::Unretained(this), |
| 453 extension_id, |
| 454 day), |
| 455 callback); |
| 456 } |
| 457 |
| 458 void CountingPolicy::OnDatabaseFailure() { |
| 459 queued_actions_.clear(); |
| 460 } |
| 461 |
| 462 void CountingPolicy::OnDatabaseClose() { |
| 463 delete this; |
| 464 } |
| 465 |
| 466 // Cleans old records from the activity log database. |
| 467 bool CountingPolicy::CleanOlderThan(sql::Connection* db, |
| 468 const base::Time& cutoff) { |
| 469 std::string clean_statement = |
| 470 "DELETE FROM " + std::string(kTableName) + " WHERE time < ?"; |
| 471 sql::Statement cleaner(db->GetCachedStatement(sql::StatementID(SQL_FROM_HERE), |
| 472 clean_statement.c_str())); |
| 473 cleaner.BindInt64(0, cutoff.ToInternalValue()); |
| 474 if (!cleaner.Run()) |
| 475 return false; |
| 476 return CleanStringTables(db); |
| 477 } |
| 478 |
| 479 // Cleans unused interned strings from the database. This should be run after |
| 480 // deleting rows from the main log table to clean out stale values. |
| 481 bool CountingPolicy::CleanStringTables(sql::Connection* db) { |
| 482 sql::Statement cleaner1(db->GetCachedStatement( |
| 483 sql::StatementID(SQL_FROM_HERE), kStringTableCleanup)); |
| 484 if (!cleaner1.Run()) |
| 485 return false; |
| 486 if (db->GetLastChangeCount() > 0) |
| 487 string_table_.ClearCache(); |
| 488 |
| 489 sql::Statement cleaner2(db->GetCachedStatement( |
| 490 sql::StatementID(SQL_FROM_HERE), kUrlTableCleanup)); |
| 491 if (!cleaner2.Run()) |
| 492 return false; |
| 493 if (db->GetLastChangeCount() > 0) |
| 494 url_table_.ClearCache(); |
| 495 |
| 496 return true; |
| 497 } |
| 498 |
| 499 void CountingPolicy::Close() { |
| 500 // The policy object should have never been created if there's no DB thread. |
| 501 DCHECK(BrowserThread::IsMessageLoopValid(BrowserThread::DB)); |
| 502 ScheduleAndForget(activity_database(), &ActivityDatabase::Close); |
| 503 } |
| 504 |
| 505 } // namespace extensions |
| OLD | NEW |