Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: chrome/common/metrics/metrics_log_base.cc

Issue 9232071: Upload UMA data using protocol buffers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/metrics/metrics_log_base.h" 5 #include "chrome/common/metrics/metrics_log_base.h"
6 6
7 #include "base/base64.h" 7 #include "base/base64.h"
8 #include "base/basictypes.h" 8 #include "base/basictypes.h"
9 #include "base/md5.h" 9 #include "base/md5.h"
10 #include "base/perftimer.h" 10 #include "base/perftimer.h"
11 #include "base/string_number_conversions.h" 11 #include "base/string_number_conversions.h"
12 #include "base/sys_byteorder.h"
12 #include "base/sys_info.h" 13 #include "base/sys_info.h"
13 #include "base/third_party/nspr/prtime.h" 14 #include "base/third_party/nspr/prtime.h"
14 #include "base/utf_string_conversions.h" 15 #include "base/utf_string_conversions.h"
15 #include "chrome/common/logging_chrome.h" 16 #include "chrome/common/logging_chrome.h"
17 #include "chrome/common/metrics/proto/histogram_event.pb.h"
18 #include "chrome/common/metrics/proto/user_action_event.pb.h"
16 #include "libxml/xmlwriter.h" 19 #include "libxml/xmlwriter.h"
17 20
18 #define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name) 21 #define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name)
19 22
20 using base::Histogram; 23 using base::Histogram;
21 using base::Time; 24 using base::Time;
22 using base::TimeDelta; 25 using base::TimeDelta;
23 26 using metrics::HistogramEventProto;
24 // http://blogs.msdn.com/oldnewthing/archive/2004/10/25/247180.aspx 27 using metrics::UserActionEventProto;
25 #if defined(OS_WIN)
26 extern "C" IMAGE_DOS_HEADER __ImageBase;
27 #endif
28 28
29 namespace { 29 namespace {
30 30
31 // libxml take xmlChar*, which is unsigned char* 31 // libxml take xmlChar*, which is unsigned char*
32 inline const unsigned char* UnsignedChar(const char* input) { 32 inline const unsigned char* UnsignedChar(const char* input) {
33 return reinterpret_cast<const unsigned char*>(input); 33 return reinterpret_cast<const unsigned char*>(input);
34 } 34 }
35 35
36 // Any id less than 16 bytes is considered to be a testing id.
jar (doing other things) 2012/02/23 01:59:18 Why not just use some fixed ID? Probability of co
Ilya Sherman 2012/02/24 02:10:06 No idea -- I was just trying to match the existing
37 bool IsTestingID(const std::string& id) {
38 return id.size() < 16;
39 }
40
41 // Converts the 8-byte prefix of an MD5 hash into a uint64 value.
42 inline uint64 HashToUInt64(const std::string& hash) {
43 uint64 value;
44 DCHECK_GE(hash.size(), sizeof(value));
45 memcpy(&value, hash.data(), sizeof(value));
46 return base::htonll(value);
47 }
48
49 // Creates an MD5 hash of the given value, and returns hash as a byte buffer
50 // encoded as an std::string.
51 std::string CreateHash(const std::string& value) {
52 base::MD5Context context;
53 base::MD5Init(&context);
54 base::MD5Update(&context, value);
55
56 base::MD5Digest digest;
57 base::MD5Final(&digest, &context);
58
59 std::string hash(reinterpret_cast<char*>(digest.a), arraysize(digest.a));
60
61 // The following log is VERY helpful when folks add some named histogram into
62 // the code, but forgot to update the descriptive list of histograms. When
63 // that happens, all we get to see (server side) is a hash of the histogram
64 // name. We can then use this logging to find out what histogram name was
65 // being hashed to a given MD5 value by just running the version of Chromium
66 // in question with --enable-logging.
67 DVLOG(1) << "Metrics: Hash numeric [" << value << "]=[" << HashToUInt64(hash)
jar (doing other things) 2012/02/23 01:59:18 It is good this is being preserved! ;-) Thanks!
68 << "]";
69
70 return hash;
71 }
72
36 } // namespace 73 } // namespace
37 74
38 class MetricsLogBase::XmlWrapper { 75 class MetricsLogBase::XmlWrapper {
39 public: 76 public:
40 XmlWrapper() 77 XmlWrapper()
41 : doc_(NULL), 78 : doc_(NULL),
42 buffer_(NULL), 79 buffer_(NULL),
43 writer_(NULL) { 80 writer_(NULL) {
44 buffer_ = xmlBufferCreate(); 81 buffer_ = xmlBufferCreate();
45 DCHECK(buffer_); 82 DCHECK(buffer_);
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
85 }; 122 };
86 123
87 MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id, 124 MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id,
88 const std::string& version_string) 125 const std::string& version_string)
89 : start_time_(Time::Now()), 126 : start_time_(Time::Now()),
90 client_id_(client_id), 127 client_id_(client_id),
91 session_id_(base::IntToString(session_id)), 128 session_id_(base::IntToString(session_id)),
92 locked_(false), 129 locked_(false),
93 xml_wrapper_(new XmlWrapper), 130 xml_wrapper_(new XmlWrapper),
94 num_events_(0) { 131 num_events_(0) {
132 int64_t build_time = GetBuildTime();
95 133
134 // Write the XML version.
96 StartElement("log"); 135 StartElement("log");
97 WriteAttribute("clientid", client_id_); 136 WriteAttribute("clientid", client_id_);
98 WriteInt64Attribute("buildtime", GetBuildTime()); 137 WriteInt64Attribute("buildtime", build_time);
99 WriteAttribute("appversion", version_string); 138 WriteAttribute("appversion", version_string);
139
140 // Write the protobuf version.
141 if (IsTestingID(client_id_)) {
jar (doing other things) 2012/02/23 01:59:18 This seems a little strange. It is nicer to not h
142 uma_proto_.set_client_id(0);
143 } else {
144 uma_proto_.set_client_id(HashToUInt64(CreateHash(client_id)));
145 }
146 uma_proto_.set_session_id(session_id);
147 uma_proto_.mutable_system_profile()->set_build_timestamp(build_time);
148 uma_proto_.mutable_system_profile()->set_app_version(version_string);
100 } 149 }
101 150
102 MetricsLogBase::~MetricsLogBase() { 151 MetricsLogBase::~MetricsLogBase() {
103 if (!locked_) { 152 if (!locked_) {
104 locked_ = true; 153 locked_ = true;
105 int result = xmlTextWriterEndDocument(xml_wrapper_->writer()); 154 int result = xmlTextWriterEndDocument(xml_wrapper_->writer());
106 DCHECK_GE(result, 0); 155 DCHECK_GE(result, 0);
107 } 156 }
108 delete xml_wrapper_; 157 delete xml_wrapper_;
109 xml_wrapper_ = NULL; 158 xml_wrapper_ = NULL;
110 } 159 }
111 160
112 void MetricsLogBase::CloseLog() { 161 void MetricsLogBase::CloseLog() {
113 DCHECK(!locked_); 162 DCHECK(!locked_);
114 locked_ = true; 163 locked_ = true;
115 164
116 int result = xmlTextWriterEndDocument(xml_wrapper_->writer()); 165 int result = xmlTextWriterEndDocument(xml_wrapper_->writer());
117 DCHECK_GE(result, 0); 166 DCHECK_GE(result, 0);
118 167
119 result = xmlTextWriterFlush(xml_wrapper_->writer()); 168 result = xmlTextWriterFlush(xml_wrapper_->writer());
120 DCHECK_GE(result, 0); 169 DCHECK_GE(result, 0);
121 170
122 #if defined(OS_CHROMEOS) 171 #if defined(OS_CHROMEOS)
172 // TODO(isherman): Once the XML pipeline is deprecated, there will be no need
173 // to track the hardware class in a separate member variable and only write it
174 // out when the log is closed.
123 xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc()); 175 xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc());
124 if (!hardware_class_.empty()) { 176 if (!hardware_class_.empty()) {
125 // The hardware class is determined after the first ongoing log is 177 // The hardware class is determined after the first ongoing log is
126 // constructed, so this adds the root element's "hardwareclass" 178 // constructed, so this adds the root element's "hardwareclass"
127 // attribute when the log is closed instead. 179 // attribute when the log is closed instead.
128 xmlNewProp(root, UnsignedChar("hardwareclass"), 180 xmlNewProp(root, UnsignedChar("hardwareclass"),
129 UnsignedChar(hardware_class_.c_str())); 181 UnsignedChar(hardware_class_.c_str()));
182
183 // Write to the protobuf too.
184 uma_proto_.mutable_system_profile()->set_hardware_class(hardware_class_);
130 } 185 }
131 186
187 // TODO(isherman): Why is this done only on ChromeOS?
jar (doing other things) 2012/02/23 01:59:18 I have no idea why :-/. I'd have to read about ho
Ilya Sherman 2012/02/24 02:10:06 Ok. Well, the good news is that we're (hopefully)
132 // Flattens the XML tree into a character buffer. 188 // Flattens the XML tree into a character buffer.
133 PerfTimer dump_timer; 189 PerfTimer dump_timer;
134 result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(), 190 result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(),
135 root, /* level */ 0, /* format */ 1); 191 root, /* level */ 0, /* format */ 1);
136 DCHECK_GE(result, 0); 192 DCHECK_GE(result, 0);
137 UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed()); 193 UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed());
138 194
139 PerfTimer free_timer; 195 PerfTimer free_timer;
140 xml_wrapper_->FreeDocWriter(); 196 xml_wrapper_->FreeDocWriter();
141 UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed()); 197 UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed());
142 #endif // OS_CHROMEOS 198 #endif // OS_CHROMEOS
143 } 199 }
144 200
145 int MetricsLogBase::GetEncodedLogSize() { 201 int MetricsLogBase::GetEncodedLogSizeXml() {
146 DCHECK(locked_); 202 DCHECK(locked_);
147 CHECK(xml_wrapper_); 203 CHECK(xml_wrapper_);
148 CHECK(xml_wrapper_->buffer()); 204 CHECK(xml_wrapper_->buffer());
149 return xml_wrapper_->buffer()->use; 205 return xml_wrapper_->buffer()->use;
150 } 206 }
151 207
152 bool MetricsLogBase::GetEncodedLog(char* buffer, int buffer_size) { 208 bool MetricsLogBase::GetEncodedLogXml(char* buffer, int buffer_size) {
153 DCHECK(locked_); 209 DCHECK(locked_);
154 if (buffer_size < GetEncodedLogSize()) 210 if (buffer_size < GetEncodedLogSizeXml())
155 return false; 211 return false;
156 212
157 memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSize()); 213 memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSizeXml());
158 return true; 214 return true;
159 } 215 }
160 216
161 std::string MetricsLogBase::GetEncodedLogString() { 217 std::string MetricsLogBase::GetEncodedLogProto() {
162 DCHECK(locked_); 218 DCHECK(locked_);
163 return std::string(reinterpret_cast<char*>(xml_wrapper_->buffer()->content)); 219 return uma_proto_.SerializeAsString();
jar (doing other things) 2012/02/23 01:59:18 It might be nice to pass in a "std::string*" and h
Ilya Sherman 2012/02/24 02:10:06 I wrote it this way based on the recommendation in
164 } 220 }
165 221
166 int MetricsLogBase::GetElapsedSeconds() { 222 int MetricsLogBase::GetElapsedSeconds() {
167 return static_cast<int>((Time::Now() - start_time_).InSeconds()); 223 return static_cast<int>((Time::Now() - start_time_).InSeconds());
168 } 224 }
169 225
170 std::string MetricsLogBase::CreateHash(const std::string& value) { 226 // static
171 base::MD5Context ctx; 227 void MetricsLogBase::CreateHashes(const std::string& string,
172 base::MD5Init(&ctx); 228 std::string* base64_encoded_hash,
173 base::MD5Update(&ctx, value); 229 uint64* numeric_hash) {
230 std::string hash = CreateHash(string);
174 231
175 base::MD5Digest digest; 232 std::string encoded_digest;
176 base::MD5Final(&digest, &ctx); 233 if (base::Base64Encode(hash, &encoded_digest))
234 DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]";
177 235
178 uint64 reverse_uint64; 236 *base64_encoded_hash = encoded_digest;
179 // UMA only uses first 8 chars of hash. We use the above uint64 instead 237 *numeric_hash = HashToUInt64(hash);
180 // of a unsigned char[8] so that we don't run into strict aliasing issues
181 // in the LOG statement below when trying to interpret reverse as a uint64.
182 unsigned char* reverse = reinterpret_cast<unsigned char *>(&reverse_uint64);
183 DCHECK(arraysize(digest.a) >= sizeof(reverse_uint64));
184 for (size_t i = 0; i < sizeof(reverse_uint64); ++i)
185 reverse[i] = digest.a[sizeof(reverse_uint64) - i - 1];
186 // The following log is VERY helpful when folks add some named histogram into
187 // the code, but forgot to update the descriptive list of histograms. When
188 // that happens, all we get to see (server side) is a hash of the histogram
189 // name. We can then use this logging to find out what histogram name was
190 // being hashed to a given MD5 value by just running the version of Chromium
191 // in question with --enable-logging.
192 DVLOG(1) << "Metrics: Hash numeric [" << value
193 << "]=[" << reverse_uint64 << "]";
194 return std::string(reinterpret_cast<char*>(digest.a), arraysize(digest.a));
195 }
196
197 std::string MetricsLogBase::CreateBase64Hash(const std::string& string) {
198 std::string encoded_digest;
199 if (base::Base64Encode(CreateHash(string), &encoded_digest)) {
200 DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]";
201 return encoded_digest;
202 }
203 return std::string();
204 } 238 }
205 239
206 void MetricsLogBase::RecordUserAction(const char* key) { 240 void MetricsLogBase::RecordUserAction(const char* key) {
207 DCHECK(!locked_); 241 DCHECK(!locked_);
208 242
209 std::string command_hash = CreateBase64Hash(key); 243 std::string base64_hash;
210 if (command_hash.empty()) { 244 uint64 numeric_hash;
245 CreateHashes(key, &base64_hash, &numeric_hash);
246 if (base64_hash.empty()) {
211 NOTREACHED() << "Unable generate encoded hash of command: " << key; 247 NOTREACHED() << "Unable generate encoded hash of command: " << key;
212 return; 248 return;
213 } 249 }
214 250
251 // Write the XML version.
215 OPEN_ELEMENT_FOR_SCOPE("uielement"); 252 OPEN_ELEMENT_FOR_SCOPE("uielement");
216 WriteAttribute("action", "command"); 253 WriteAttribute("action", "command");
217 WriteAttribute("targetidhash", command_hash); 254 WriteAttribute("targetidhash", base64_hash);
255
256 // Write the protobuf version.
257 UserActionEventProto* user_action = uma_proto_.add_user_action_event();
258 user_action->set_name_hash(numeric_hash);
jar (doing other things) 2012/02/23 01:59:18 Have you consistently decided to use a full 64bit
Ilya Sherman 2012/02/24 02:10:06 Yes, that was my understanding.
259 user_action->set_time(Time::Now().ToTimeT());
jar (doing other things) 2012/02/23 01:59:18 Note that Time::Now() is not monotonic, and will v
Ilya Sherman 2012/02/24 02:10:06 Hmm, that's a really good point. I think TimeTick
218 260
219 // TODO(jhughes): Properly track windows. 261 // TODO(jhughes): Properly track windows.
220 WriteIntAttribute("window", 0); 262 WriteIntAttribute("window", 0);
221 WriteCommonEventAttributes(); 263 WriteCommonEventAttributes();
222 264
223 ++num_events_; 265 ++num_events_;
224 } 266 }
225 267
226 void MetricsLogBase::RecordLoadEvent(int window_id, 268 void MetricsLogBase::RecordLoadEvent(int window_id,
227 const GURL& url, 269 const GURL& url,
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
303 return base::Uint64ToString(Time::Now().ToTimeT()); 345 return base::Uint64ToString(Time::Now().ToTimeT());
304 } 346 }
305 347
306 // These are the attributes that are common to every event. 348 // These are the attributes that are common to every event.
307 void MetricsLogBase::WriteCommonEventAttributes() { 349 void MetricsLogBase::WriteCommonEventAttributes() {
308 WriteAttribute("session", session_id_); 350 WriteAttribute("session", session_id_);
309 WriteAttribute("time", GetCurrentTimeString()); 351 WriteAttribute("time", GetCurrentTimeString());
310 } 352 }
311 353
312 void MetricsLogBase::WriteAttribute(const std::string& name, 354 void MetricsLogBase::WriteAttribute(const std::string& name,
313 const std::string& value) { 355 const std::string& value) {
314 DCHECK(!locked_); 356 DCHECK(!locked_);
315 DCHECK(!name.empty()); 357 DCHECK(!name.empty());
316 358
317 int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(), 359 int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(),
318 UnsignedChar(name.c_str()), 360 UnsignedChar(name.c_str()),
319 UnsignedChar(value.c_str())); 361 UnsignedChar(value.c_str()));
320 DCHECK_GE(result, 0); 362 DCHECK_GE(result, 0);
321 } 363 }
322 364
323 void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) { 365 void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) {
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
380 const Histogram& histogram, 422 const Histogram& histogram,
381 const Histogram::SampleSet& snapshot) { 423 const Histogram::SampleSet& snapshot) {
382 DCHECK(!locked_); 424 DCHECK(!locked_);
383 DCHECK_NE(0, snapshot.TotalCount()); 425 DCHECK_NE(0, snapshot.TotalCount());
384 snapshot.CheckSize(histogram); 426 snapshot.CheckSize(histogram);
385 427
386 // We will ignore the MAX_INT/infinite value in the last element of range[]. 428 // We will ignore the MAX_INT/infinite value in the last element of range[].
387 429
388 OPEN_ELEMENT_FOR_SCOPE("histogram"); 430 OPEN_ELEMENT_FOR_SCOPE("histogram");
389 431
390 WriteAttribute("name", CreateBase64Hash(histogram.histogram_name())); 432 std::string base64_name_hash;
433 uint64 numeric_name_hash;
434 CreateHashes(histogram.histogram_name(),
435 &base64_name_hash,
436 &numeric_name_hash);
437
438 // Write the XML version.
439 WriteAttribute("name", base64_name_hash);
391 440
392 WriteInt64Attribute("sum", snapshot.sum()); 441 WriteInt64Attribute("sum", snapshot.sum());
393 // TODO(jar): Remove sumsquares when protobuffer accepts this as optional. 442 // TODO(jar): Remove sumsquares when protobuffer accepts this as optional.
394 WriteInt64Attribute("sumsquares", 0); 443 WriteInt64Attribute("sumsquares", 0);
395 444
396 for (size_t i = 0; i < histogram.bucket_count(); i++) { 445 for (size_t i = 0; i < histogram.bucket_count(); i++) {
397 if (snapshot.counts(i)) { 446 if (snapshot.counts(i)) {
398 OPEN_ELEMENT_FOR_SCOPE("histogrambucket"); 447 OPEN_ELEMENT_FOR_SCOPE("histogrambucket");
399 WriteIntAttribute("min", histogram.ranges(i)); 448 WriteIntAttribute("min", histogram.ranges(i));
400 WriteIntAttribute("max", histogram.ranges(i + 1)); 449 WriteIntAttribute("max", histogram.ranges(i + 1));
401 WriteIntAttribute("count", snapshot.counts(i)); 450 WriteIntAttribute("count", snapshot.counts(i));
402 } 451 }
403 } 452 }
453
454 // Write the protobuf version.
455 HistogramEventProto* histogram_proto = uma_proto_.add_histogram_event();
456 histogram_proto->set_name_hash(numeric_name_hash);
457 histogram_proto->set_sum(snapshot.sum());
458
459 for (size_t i = 0; i < histogram.bucket_count(); ++i) {
460 if (snapshot.counts(i)) {
461 HistogramEventProto::Bucket* bucket = histogram_proto->add_bucket();
462 bucket->set_min(histogram.ranges(i));
463 bucket->set_max(histogram.ranges(i + 1));
464 bucket->set_bucket_index(i);
465 bucket->set_count(snapshot.counts(i));
466 }
467 }
404 } 468 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698