Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(566)

Side by Side Diff: chrome/browser/extensions/activity_log/counting_policy.cc

Issue 21646004: Compressed activity log database storage (Closed) Base URL: http://git.chromium.org/chromium/src.git@refactor-cleanups
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // A policy for storing activity log data to a database that performs
6 // aggregation to reduce the size of the database. The database layout is
7 // nearly the same as FullStreamUIPolicy, which stores a complete log, with a
8 // few changes:
9 // - a "count" column is added to track how many log records were merged
10 // together into this row
11 // - the "time" column measures the most recent time that the current row was
12 // updated
13 // When writing a record, if a row already exists where all other columns
14 // (extension_id, action_type, api_name, args, urls, etc.) all match, and the
15 // previous time falls within today (the current time), then the count field on
16 // the old row is incremented. Otherwise, a new row is written.
17 //
18 // For many text columns, repeated strings are compressed by moving string
19 // storage to a separate table ("string_ids") and storing only an identifier in
20 // the logging table. For example, if the api_name_x column contained the
21 // value 4 and the string_ids table contained a row with primary key 4 and
22 // value 'tabs.query', then the api_name field should be taken to have the
23 // value 'tabs.query'. Each column ending with "_x" is compressed in this way.
24 // All lookups are to the string_ids table, except for the page_url_x and
25 // arg_url_x columns, which are converted via the url_ids table (this
26 // separation of URL values is to help simplify history clearing).
27 //
28 // For strings which are unique (only used once), this scheme will increase
29 // storage requirements; each string will appear twice, once in a string table
30 // and once in a string table index. For strings which appear twice storage
31 // should be approximately break-even. For strings which appear more than
32 // twice, this compression will likely save space.
33 //
34 // The activitylog_uncompressed view allows for simpler reading of the activity
35 // log contents with identifiers already translated to string values.
36
37 // TODO(mvrable): Some additional tasks that need to be done for this policy:
38 // * Clean out old activity log records (say, those older than a couple of
39 // days, perhaps configurable or adaptive based on database size).
40 // * Prune strings from the string tables if they are no longer in use.
41 // * Analyze storage requirements in more detail.
42 // * Perhaps add heuristics for strings likely to be unique, and directly
43 // store those in the activitylog_compressed table?
44 // * Factor out common code that should be shared between policies, and make
45 // sure all sanitization (for example, of URLs) is done uniformly.
46
47 #include "chrome/browser/extensions/activity_log/counting_policy.h"
48
49 #include <map>
50 #include <string>
51 #include <vector>
52
53 #include "base/json/json_string_value_serializer.h"
54 #include "base/strings/string_util.h"
55 #include "base/strings/stringprintf.h"
56
57 namespace {
58
59 // TODO(mvrable): Consider placing this in a common location.
60 std::string Serialize(const base::Value* value) {
61 std::string value_as_text;
62 if (!value) {
63 value_as_text = "";
64 } else {
65 JSONStringValueSerializer serializer(&value_as_text);
66 serializer.SerializeAndOmitBinaryValues(*value);
67 }
68 return value_as_text;
69 }
70
71 // Given a day (timestamp at local midnight), compute the start of the
72 // following day. To allow for time zone changes, add more than a day then
73 // round down.
74 base::Time NextDay(const base::Time& day) {
75 return (day + base::TimeDelta::FromHours(36)).LocalMidnight();
76 }
77
78 } // namespace
79
80 namespace extensions {
81
82 const char* CountingPolicy::kTableName = "activitylog_compressed";
83 const char* CountingPolicy::kTableContentFields[] = {
84 "count", "extension_id_x", "time", "action_type", "api_name_x", "args_x",
85 "page_url_x", "page_title_x", "arg_url_x", "other_x"};
86 const char* CountingPolicy::kTableFieldTypes[] = {
87 "INTEGER NOT NULL DEFAULT 1", "INTEGER NOT NULL", "INTEGER", "INTEGER",
88 "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
89 "INTEGER"};
90
91 static const char kPolicyTableView[] =
92 "DROP VIEW IF EXISTS activitylog_uncompressed;\n"
93 "CREATE VIEW activitylog_uncompressed AS\n"
94 "SELECT count,\n"
95 " x1.value AS extension_id,\n"
96 " time,\n"
97 " action_type,\n"
98 " x2.value AS api_name,\n"
99 " x3.value AS args,\n"
100 " x4.value AS page_url,\n"
101 " x5.value AS page_title,\n"
102 " x6.value AS arg_url,\n"
103 " x7.value AS other\n"
104 "FROM activitylog_compressed\n"
105 " LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n"
106 " LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n"
107 " LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n"
108 " LEFT JOIN url_ids AS x4 ON (x4.id = page_url_x)\n"
109 " LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n"
110 " LEFT JOIN url_ids AS x6 ON (x6.id = arg_url_x)\n"
111 " LEFT JOIN string_ids AS x7 ON (x7.id = other_x)\n";
112
113 CountingPolicy::CountingPolicy(Profile* profile)
114 : StreamWithoutArgsUIPolicy(profile) {}
115
116 CountingPolicy::~CountingPolicy() {}
117
118 bool CountingPolicy::InitDatabase(sql::Connection* db) {
119 // TODO(mvrable): Add logic to drop old database tables.
120
121 if (!string_table_.Initialize(db))
122 return false;
123 if (!url_table_.Initialize(db))
124 return false;
125
126 // Create the unified activity log entry table.
127 if (!ActivityDatabase::InitializeTable(db,
128 kTableName,
129 kTableContentFields,
130 kTableFieldTypes,
131 arraysize(kTableContentFields)))
132 return false;
133
134 // Create a view for easily accessing the uncompressed form of the data.
135 return db->Execute(kPolicyTableView);
136 }
137
138 bool CountingPolicy::FlushDatabase(sql::Connection* db) {
139 // Columns that must match exactly for database rows to be coalesced.
140 static const char* matched_columns[] = {
141 "extension_id_x", "action_type", "api_name_x", "args_x", "page_url_x",
142 "page_title_x", "arg_url_x", "other_x"};
143 LOG(INFO) << "Starting counting policy flush";
144 Action::ActionVector queue;
145 queue.swap(queued_actions_);
146
147 if (queue.empty())
148 return true;
149
150 sql::Transaction transaction(db);
151 if (!transaction.Begin())
152 return false;
153
154 std::string insert_str =
155 "INSERT INTO " + std::string(kTableName) + "(count, time";
156 std::string update_str =
157 "UPDATE " + std::string(kTableName) +
158 " SET count = count + 1, time = max(?, time)"
159 " WHERE time >= ? AND time < ?";
160
161 for (size_t i = 0; i < arraysize(matched_columns); i++) {
162 insert_str =
163 base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]);
164 update_str = base::StringPrintf(
165 "%s AND %s = ?", update_str.c_str(), matched_columns[i]);
166 }
167 insert_str += ") VALUES (1, ?";
168 for (size_t i = 0; i < arraysize(matched_columns); i++) {
169 insert_str += ", ?";
170 }
171 insert_str += ")";
172
173 // TODO(mvrable): URL sanitization or summarization.
174
175 Action::ActionVector::size_type i;
176 for (i = 0; i != queue.size(); ++i) {
177 const Action& action = *queue[i];
178
179 base::Time day_start = action.time().LocalMidnight();
180 base::Time next_day = NextDay(day_start);
181
182 // The contents in values must match up with fields in matched_columns.
183 int64 id;
184 std::vector<int64> matched_values;
185
186 if (!string_table_.StringToInt(db, action.extension_id(), &id))
187 return false;
188 matched_values.push_back(id);
189
190 matched_values.push_back(static_cast<int>(action.action_type()));
191
192 if (!string_table_.StringToInt(db, action.api_name(), &id))
193 return false;
194 matched_values.push_back(id);
195
196 if (!string_table_.StringToInt(db, Serialize(action.args()), &id))
197 return false;
198 matched_values.push_back(id);
199
200 if (!url_table_.StringToInt(db, action.page_url().spec(), &id))
201 return false;
202 matched_values.push_back(id);
203
204 // TODO(mvrable): Create a title_table_?
205 if (!string_table_.StringToInt(db, action.page_title(), &id))
206 return false;
207 matched_values.push_back(id);
208
209 if (!url_table_.StringToInt(db, action.arg_url().spec(), &id))
210 return false;
211 matched_values.push_back(id);
212
213 if (!string_table_.StringToInt(db, Serialize(action.other()), &id))
214 return false;
215 matched_values.push_back(id);
216
217 // Assume there is an existing row for this action, and try to update the
218 // count.
219 sql::Statement update_statement(db->GetCachedStatement(
220 sql::StatementID(SQL_FROM_HERE), update_str.c_str()));
221 update_statement.BindInt64(0, action.time().ToInternalValue());
222 update_statement.BindInt64(1, day_start.ToInternalValue());
223 update_statement.BindInt64(2, next_day.ToInternalValue());
224 for (size_t j = 0; j < matched_values.size(); j++) {
225 update_statement.BindInt64(j + 3, matched_values[j]);
226 }
227 if (!update_statement.Run())
228 return false;
229
230 // Check if the update succeeded (was the count of updated rows non-zero)?
231 // If it failed because no matching row existed, fall back to inserting a
232 // new record.
233 if (db->GetLastChangeCount() > 0) {
234 continue;
235 }
236 sql::Statement insert_statement(db->GetCachedStatement(
237 sql::StatementID(SQL_FROM_HERE), insert_str.c_str()));
238 insert_statement.BindInt64(0, action.time().ToInternalValue());
239 for (size_t j = 0; j < matched_values.size(); j++)
240 insert_statement.BindInt64(j + 1, matched_values[j]);
241 if (!insert_statement.Run())
242 return false;
243 }
244
245 LOG(INFO) << "Committing counting policy flush";
246 if (!transaction.Commit())
247 return false;
248 LOG(INFO) << "Finished commit";
249 return true;
250 }
251
252 #if 0
253 void CountingPolicy::ProcessAction(scoped_refptr<Action> action) {
254 // TODO(mvrable): Right now this argument stripping updates the Action object
255 // in place, which isn't good if there are other users of the object. When
256 // database writing is moved to policy class, the modifications should be
257 // made locally.
258 action = ProcessArguments(action);
259 ScheduleAndForget(this, &CountingPolicy::QueueAction, action);
260 }
261
262 void CountingPolicy::QueueAction(scoped_refptr<Action> action) {
263 if (!activity_database()->is_db_valid())
264 return;
265
266 std::map<scoped_refptr<Action>, int> queued_writes;
267 }
268 #endif
269
270 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698