Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(598)

Side by Side Diff: tools/battor_agent/battor_agent.cc

Issue 2826913002: [BattOr] Retry battor_agent commands when they fail. (Closed)
Patch Set: Doubled retry attempts (each StopTracing attempt requires two retries) Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 #include "tools/battor_agent/battor_agent.h" 4 #include "tools/battor_agent/battor_agent.h"
5 5
6 #include <iomanip> 6 #include <iomanip>
7 7
8 #include "base/bind.h" 8 #include "base/bind.h"
9 #include "base/threading/thread_task_runner_handle.h" 9 #include "base/threading/thread_task_runner_handle.h"
10 #include "tools/battor_agent/battor_connection_impl.h" 10 #include "tools/battor_agent/battor_connection_impl.h"
11 #include "tools/battor_agent/battor_sample_converter.h" 11 #include "tools/battor_agent/battor_sample_converter.h"
12 12
13 using std::vector; 13 using std::vector;
14 14
15 namespace battor { 15 namespace battor {
16 16
17 namespace { 17 namespace {
18 18
19 // The maximum number of times to retry when initializing a BattOr. 19 // The maximum number of times to retry a command.
20 const uint8_t kMaxInitAttempts = 20; 20 const uint8_t kMaxCommandAttempts = 10;
rnephew (Reviews Here) 2017/05/09 15:04:05 Was 20 chosen for a reason for kMaxInitAttempts/kM
charliea (OOO until 10-5) 2017/05/09 19:58:08 I haven't seen this code in a long, long time...
aschulman 2017/05/11 13:13:00 20 was due to the super short retry interval. With
21
22 // The maximum number of times to retry the StartTracing command.
23 const uint8_t kMaxStartTracingAttempts = 5;
24
25 // The number of milliseconds to wait before retrying initialization.
26 const uint16_t kInitRetryDelayMilliseconds = 100;
27
28 // The maximum number of times to retry when reading a message.
29 const uint8_t kMaxReadAttempts = 20;
30
31 // The number of milliseconds to wait before trying to read a message again.
32 const uint8_t kReadRetryDelayMilliseconds = 1;
33 21
34 // The amount of time we need to wait after recording a clock sync marker in 22 // The amount of time we need to wait after recording a clock sync marker in
35 // order to ensure that the sample we synced to doesn't get thrown out. 23 // order to ensure that the sample we synced to doesn't get thrown out.
36 const uint8_t kStopTracingClockSyncDelayMilliseconds = 100; 24 const uint8_t kStopTracingClockSyncDelayMilliseconds = 100;
37 25
38 // The number of seconds allowed for a given action before timing out. 26 // The number of seconds to wait before retrying a command.
39 const uint8_t kBattOrTimeoutSeconds = 4; 27 const uint16_t kCommandRetryDelaySeconds = 2;
rnephew (Reviews Here) 2017/04/21 22:10:45 These delays seem to be much larger than the previ
aschulman 2017/05/11 13:12:59 Yeah the 100 msec retry interval was too aggressiv
28
29 // The number of seconds allowed for a control message before timing out.
30 const uint8_t kBattOrControlMessageTimeoutSeconds = 2;
31
32 // The number of seconds allowed for connection to open before timing out.
33 const uint8_t kBattOrConnectionTimeoutSeconds = 4;
40 34
41 // Returns true if the specified vector of bytes decodes to a message that is an 35 // Returns true if the specified vector of bytes decodes to a message that is an
42 // ack for the specified control message type. 36 // ack for the specified control message type.
43 bool IsAckOfControlCommand(BattOrMessageType message_type, 37 bool IsAckOfControlCommand(BattOrMessageType message_type,
44 BattOrControlMessageType control_message_type, 38 BattOrControlMessageType control_message_type,
45 const vector<char>& msg) { 39 const vector<char>& msg) {
46 if (message_type != BATTOR_MESSAGE_TYPE_CONTROL_ACK) 40 if (message_type != BATTOR_MESSAGE_TYPE_CONTROL_ACK)
47 return false; 41 return false;
48 42
49 if (msg.size() != sizeof(BattOrControlMessageAck)) 43 if (msg.size() != sizeof(BattOrControlMessageAck))
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 Listener* listener, 110 Listener* listener,
117 scoped_refptr<base::SingleThreadTaskRunner> file_thread_task_runner, 111 scoped_refptr<base::SingleThreadTaskRunner> file_thread_task_runner,
118 scoped_refptr<base::SingleThreadTaskRunner> ui_thread_task_runner) 112 scoped_refptr<base::SingleThreadTaskRunner> ui_thread_task_runner)
119 : connection_(new BattOrConnectionImpl(path, 113 : connection_(new BattOrConnectionImpl(path,
120 this, 114 this,
121 file_thread_task_runner, 115 file_thread_task_runner,
122 ui_thread_task_runner)), 116 ui_thread_task_runner)),
123 listener_(listener), 117 listener_(listener),
124 last_action_(Action::INVALID), 118 last_action_(Action::INVALID),
125 command_(Command::INVALID), 119 command_(Command::INVALID),
126 num_init_attempts_(0), 120 num_command_attempts_(0) {
127 num_start_tracing_attempts_(0),
128 num_read_attempts_(0) {
129 // We don't care what thread the constructor is called on - we only care that 121 // We don't care what thread the constructor is called on - we only care that
130 // all of the other method invocations happen on the same thread. 122 // all of the other method invocations happen on the same thread.
131 thread_checker_.DetachFromThread(); 123 thread_checker_.DetachFromThread();
132 } 124 }
133 125
134 BattOrAgent::~BattOrAgent() { 126 BattOrAgent::~BattOrAgent() {
135 DCHECK(thread_checker_.CalledOnValidThread()); 127 DCHECK(thread_checker_.CalledOnValidThread());
136 } 128 }
137 129
138 void BattOrAgent::StartTracing() { 130 void BattOrAgent::StartTracing() {
139 DCHECK(thread_checker_.CalledOnValidThread()); 131 DCHECK(thread_checker_.CalledOnValidThread());
140 132
141 // When tracing is restarted, all previous clock sync markers are invalid. 133 // When tracing is restarted, all previous clock sync markers are invalid.
142 clock_sync_markers_.clear(); 134 clock_sync_markers_.clear();
143 last_clock_sync_time_ = base::TimeTicks(); 135 last_clock_sync_time_ = base::TimeTicks();
144 136
145 num_start_tracing_attempts_ = 1;
146 command_ = Command::START_TRACING; 137 command_ = Command::START_TRACING;
147 PerformAction(Action::REQUEST_CONNECTION); 138 PerformAction(Action::REQUEST_CONNECTION);
148 } 139 }
149 140
150 void BattOrAgent::StopTracing() { 141 void BattOrAgent::StopTracing() {
151 DCHECK(thread_checker_.CalledOnValidThread()); 142 DCHECK(thread_checker_.CalledOnValidThread());
152 143
153 command_ = Command::STOP_TRACING; 144 command_ = Command::STOP_TRACING;
154 PerformAction(Action::REQUEST_CONNECTION); 145 PerformAction(Action::REQUEST_CONNECTION);
155 } 146 }
156 147
157 void BattOrAgent::RecordClockSyncMarker(const std::string& marker) { 148 void BattOrAgent::RecordClockSyncMarker(const std::string& marker) {
158 DCHECK(thread_checker_.CalledOnValidThread()); 149 DCHECK(thread_checker_.CalledOnValidThread());
159 150
160 command_ = Command::RECORD_CLOCK_SYNC_MARKER; 151 command_ = Command::RECORD_CLOCK_SYNC_MARKER;
161 pending_clock_sync_marker_ = marker; 152 pending_clock_sync_marker_ = marker;
162 PerformAction(Action::REQUEST_CONNECTION); 153 PerformAction(Action::REQUEST_CONNECTION);
163 } 154 }
164 155
165 void BattOrAgent::GetFirmwareGitHash() { 156 void BattOrAgent::GetFirmwareGitHash() {
166 DCHECK(thread_checker_.CalledOnValidThread()); 157 DCHECK(thread_checker_.CalledOnValidThread());
167 158
168 command_ = Command::GET_FIRMWARE_GIT_HASH; 159 command_ = Command::GET_FIRMWARE_GIT_HASH;
169 PerformAction(Action::REQUEST_CONNECTION); 160 PerformAction(Action::REQUEST_CONNECTION);
170 } 161 }
171 162
172 void BattOrAgent::BeginConnect() { 163 void BattOrAgent::BeginConnect() {
173 DCHECK(thread_checker_.CalledOnValidThread()); 164 DCHECK(thread_checker_.CalledOnValidThread());
174 165
166 // Start the timeout timer for opening the connection.
167 timeout_callback_.Reset(
168 base::Bind(&BattOrAgent::OnActionTimeout, AsWeakPtr()));
169 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
170 FROM_HERE, timeout_callback_.callback(),
171 base::TimeDelta::FromSeconds(kBattOrConnectionTimeoutSeconds));
172
175 connection_->Open(); 173 connection_->Open();
176 } 174 }
177 175
178 void BattOrAgent::OnConnectionOpened(bool success) { 176 void BattOrAgent::OnConnectionOpened(bool success) {
179 // Return immediately if opening the connection already timed out. 177 // Cancel timeout because the connection was opened in time.
180 if (timeout_callback_.IsCancelled())
181 return;
182 timeout_callback_.Cancel(); 178 timeout_callback_.Cancel();
183 179
184 if (!success) { 180 if (!success) {
185 CompleteCommand(BATTOR_ERROR_CONNECTION_FAILED); 181 CompleteCommand(BATTOR_ERROR_CONNECTION_FAILED);
186 return; 182 return;
187 } 183 }
188 184
189 switch (command_) { 185 switch (command_) {
190 case Command::START_TRACING: 186 case Command::START_TRACING:
191 num_init_attempts_ = 1;
192 PerformAction(Action::SEND_INIT); 187 PerformAction(Action::SEND_INIT);
193 return; 188 return;
194 case Command::STOP_TRACING: 189 case Command::STOP_TRACING:
195 PerformAction(Action::SEND_EEPROM_REQUEST); 190 PerformAction(Action::SEND_EEPROM_REQUEST);
196 return; 191 return;
197 case Command::RECORD_CLOCK_SYNC_MARKER: 192 case Command::RECORD_CLOCK_SYNC_MARKER:
198 PerformAction(Action::SEND_CURRENT_SAMPLE_REQUEST); 193 PerformAction(Action::SEND_CURRENT_SAMPLE_REQUEST);
199 return; 194 return;
200 case Command::GET_FIRMWARE_GIT_HASH: 195 case Command::GET_FIRMWARE_GIT_HASH:
201 num_init_attempts_ = 1;
202 PerformAction(Action::SEND_INIT); 196 PerformAction(Action::SEND_INIT);
203 return; 197 return;
204 case Command::INVALID: 198 case Command::INVALID:
205 NOTREACHED(); 199 NOTREACHED();
200 return;
206 } 201 }
207 } 202 }
208 203
209 void BattOrAgent::OnBytesSent(bool success) { 204 void BattOrAgent::OnBytesSent(bool success) {
210 DCHECK(thread_checker_.CalledOnValidThread()); 205 DCHECK(thread_checker_.CalledOnValidThread());
211 206
212 // Return immediately if whatever action we were trying to perform already
213 // timed out.
214 if (timeout_callback_.IsCancelled())
215 return;
216 timeout_callback_.Cancel();
217
218 if (!success) { 207 if (!success) {
219 CompleteCommand(BATTOR_ERROR_SEND_ERROR); 208 CompleteCommand(BATTOR_ERROR_SEND_ERROR);
220 return; 209 return;
221 } 210 }
222 211
223 switch (last_action_) { 212 switch (last_action_) {
224 case Action::SEND_INIT: 213 case Action::SEND_INIT:
225 PerformAction(Action::READ_INIT_ACK); 214 PerformAction(Action::READ_INIT_ACK);
226 return; 215 return;
227 case Action::SEND_SET_GAIN: 216 case Action::SEND_SET_GAIN:
228 PerformAction(Action::READ_SET_GAIN_ACK); 217 PerformAction(Action::READ_SET_GAIN_ACK);
229 return; 218 return;
230 case Action::SEND_START_TRACING: 219 case Action::SEND_START_TRACING:
231 PerformAction(Action::READ_START_TRACING_ACK); 220 PerformAction(Action::READ_START_TRACING_ACK);
232 return; 221 return;
233 case Action::SEND_EEPROM_REQUEST: 222 case Action::SEND_EEPROM_REQUEST:
234 num_read_attempts_ = 1;
235 PerformAction(Action::READ_EEPROM); 223 PerformAction(Action::READ_EEPROM);
236 return; 224 return;
237 case Action::SEND_SAMPLES_REQUEST: 225 case Action::SEND_SAMPLES_REQUEST:
238 num_read_attempts_ = 1;
239 PerformAction(Action::READ_CALIBRATION_FRAME); 226 PerformAction(Action::READ_CALIBRATION_FRAME);
240 return; 227 return;
241 case Action::SEND_CURRENT_SAMPLE_REQUEST: 228 case Action::SEND_CURRENT_SAMPLE_REQUEST:
242 num_read_attempts_ = 1;
243 PerformAction(Action::READ_CURRENT_SAMPLE); 229 PerformAction(Action::READ_CURRENT_SAMPLE);
244 return; 230 return;
245 case Action::SEND_GIT_HASH_REQUEST: 231 case Action::SEND_GIT_HASH_REQUEST:
246 num_read_attempts_ = 1;
247 PerformAction(Action::READ_GIT_HASH); 232 PerformAction(Action::READ_GIT_HASH);
248 return; 233 return;
249 default: 234 default:
250 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 235 NOTREACHED();
236 return;
251 } 237 }
252 } 238 }
253 239
254 void BattOrAgent::OnMessageRead(bool success, 240 void BattOrAgent::OnMessageRead(bool success,
255 BattOrMessageType type, 241 BattOrMessageType type,
256 std::unique_ptr<vector<char>> bytes) { 242 std::unique_ptr<vector<char>> bytes) {
257 // Return immediately if whatever action we were trying to perform already
258 // timed out.
259 if (timeout_callback_.IsCancelled())
260 return;
261 timeout_callback_.Cancel();
262
263 if (!success) { 243 if (!success) {
264 switch (last_action_) { 244 switch (last_action_) {
265 case Action::READ_GIT_HASH: 245 case Action::READ_GIT_HASH:
246 case Action::READ_INIT_ACK:
247 case Action::READ_SET_GAIN_ACK:
248 case Action::READ_START_TRACING_ACK:
266 case Action::READ_EEPROM: 249 case Action::READ_EEPROM:
267 case Action::READ_CALIBRATION_FRAME: 250 case Action::READ_CALIBRATION_FRAME:
268 case Action::READ_DATA_FRAME: 251 case Action::READ_DATA_FRAME:
269 case Action::READ_CURRENT_SAMPLE: 252 RetryCommand();
270 if (num_read_attempts_++ > kMaxReadAttempts) {
271 CompleteCommand(BATTOR_ERROR_RECEIVE_ERROR);
272 return;
273 }
274
275 PerformDelayedAction(last_action_, base::TimeDelta::FromMilliseconds(
276 kReadRetryDelayMilliseconds));
277 return; 253 return;
278 254
279 // Retry sending an INIT if it an ACK is not received. 255 case Action::READ_CURRENT_SAMPLE:
280 case Action::SEND_INIT: 256 CompleteCommand(BATTOR_ERROR_RECEIVE_ERROR);
281 case Action::READ_INIT_ACK:
282 if (num_init_attempts_++ < kMaxInitAttempts) {
283 PerformDelayedAction(Action::SEND_INIT,
284 base::TimeDelta::FromMilliseconds(kInitRetryDelayMilliseconds));
285 } else {
286 CompleteCommand(BATTOR_ERROR_TOO_MANY_INIT_RETRIES);
287 }
288
289 return;
290
291 case Action::READ_START_TRACING_ACK:
292 if (num_start_tracing_attempts_++ < kMaxStartTracingAttempts) {
293 num_init_attempts_ = 1;
294 PerformAction(Action::SEND_INIT);
295 } else {
296 CompleteCommand(BATTOR_ERROR_TOO_MANY_START_TRACING_RETRIES);
297 }
298
299 return; 257 return;
300 258
301 default: 259 default:
302 CompleteCommand(BATTOR_ERROR_RECEIVE_ERROR); 260 NOTREACHED();
303 return; 261 return;
304 } 262 }
305 } 263 }
306 264
265 // Successfully read a message, cancel any timeouts.
266 timeout_callback_.Cancel();
267
307 switch (last_action_) { 268 switch (last_action_) {
308 case Action::READ_INIT_ACK: 269 case Action::READ_INIT_ACK:
309 if (!IsAckOfControlCommand(type, BATTOR_CONTROL_MESSAGE_TYPE_INIT, 270 if (!IsAckOfControlCommand(type, BATTOR_CONTROL_MESSAGE_TYPE_INIT,
310 *bytes)) { 271 *bytes)) {
311 if (num_init_attempts_++ < kMaxInitAttempts) { 272 RetryCommand();
312 PerformDelayedAction(Action::SEND_INIT,
313 base::TimeDelta::FromMilliseconds(kInitRetryDelayMilliseconds));
314 } else {
315 CompleteCommand(BATTOR_ERROR_TOO_MANY_INIT_RETRIES);
316 }
317
318 return; 273 return;
319 } 274 }
320 275
321 switch (command_) { 276 switch (command_) {
322 case Command::START_TRACING: 277 case Command::START_TRACING:
323 PerformAction(Action::SEND_SET_GAIN); 278 PerformAction(Action::SEND_SET_GAIN);
324 return; 279 return;
325 case Command::GET_FIRMWARE_GIT_HASH: 280 case Command::GET_FIRMWARE_GIT_HASH:
326 PerformAction(Action::SEND_GIT_HASH_REQUEST); 281 PerformAction(Action::SEND_GIT_HASH_REQUEST);
327 return; 282 return;
328 default: 283 default:
329 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 284 NOTREACHED();
330 return; 285 return;
331 } 286 }
332 287
333 case Action::READ_SET_GAIN_ACK: 288 case Action::READ_SET_GAIN_ACK:
334 if (!IsAckOfControlCommand(type, BATTOR_CONTROL_MESSAGE_TYPE_SET_GAIN, 289 if (!IsAckOfControlCommand(type, BATTOR_CONTROL_MESSAGE_TYPE_SET_GAIN,
335 *bytes)) { 290 *bytes)) {
336 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 291 RetryCommand();
337 return; 292 return;
338 } 293 }
339 294
340 PerformAction(Action::SEND_START_TRACING); 295 PerformAction(Action::SEND_START_TRACING);
341 return; 296 return;
342 297
343 case Action::READ_START_TRACING_ACK: 298 case Action::READ_START_TRACING_ACK:
344 if (!IsAckOfControlCommand( 299 if (!IsAckOfControlCommand(
345 type, BATTOR_CONTROL_MESSAGE_TYPE_START_SAMPLING_SD, *bytes)) { 300 type, BATTOR_CONTROL_MESSAGE_TYPE_START_SAMPLING_SD, *bytes)) {
346 if (num_start_tracing_attempts_++ < kMaxStartTracingAttempts) { 301 RetryCommand();
347 num_init_attempts_ = 1;
348 PerformAction(Action::SEND_INIT);
349 } else {
350 CompleteCommand(BATTOR_ERROR_TOO_MANY_START_TRACING_RETRIES);
351 }
352
353 return; 302 return;
354 } 303 }
355 304
356 CompleteCommand(BATTOR_ERROR_NONE); 305 CompleteCommand(BATTOR_ERROR_NONE);
357 return; 306 return;
358 307
359 case Action::READ_EEPROM: { 308 case Action::READ_EEPROM: {
360 battor_eeprom_ = ParseEEPROM(type, *bytes); 309 battor_eeprom_ = ParseEEPROM(type, *bytes);
361 if (!battor_eeprom_) { 310 if (!battor_eeprom_) {
362 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 311 RetryCommand();
363 return; 312 return;
364 } 313 }
365 314
366 // Make sure that we don't request samples until a safe amount of time has 315 // Make sure that we don't request samples until a safe amount of time has
367 // elapsed since recording the last clock sync marker: we need to ensure 316 // elapsed since recording the last clock sync marker: we need to ensure
368 // that the sample we synced to doesn't get thrown out. 317 // that the sample we synced to doesn't get thrown out.
369 base::TimeTicks min_request_samples_time = 318 base::TimeTicks min_request_samples_time =
370 last_clock_sync_time_ + base::TimeDelta::FromMilliseconds( 319 last_clock_sync_time_ + base::TimeDelta::FromMilliseconds(
371 kStopTracingClockSyncDelayMilliseconds); 320 kStopTracingClockSyncDelayMilliseconds);
372 base::TimeDelta request_samples_delay = std::max( 321 base::TimeDelta request_samples_delay = std::max(
373 min_request_samples_time - base::TimeTicks::Now(), base::TimeDelta()); 322 min_request_samples_time - base::TimeTicks::Now(), base::TimeDelta());
374 323
375 PerformDelayedAction(Action::SEND_SAMPLES_REQUEST, request_samples_delay); 324 PerformDelayedAction(Action::SEND_SAMPLES_REQUEST, request_samples_delay);
376 return; 325 return;
377 } 326 }
378 case Action::READ_CALIBRATION_FRAME: { 327 case Action::READ_CALIBRATION_FRAME: {
379 BattOrFrameHeader frame_header; 328 BattOrFrameHeader frame_header;
380 if (!ParseSampleFrame(type, *bytes, next_sequence_number_++, 329 if (!ParseSampleFrame(type, *bytes, next_sequence_number_++,
381 &frame_header, &calibration_frame_)) { 330 &frame_header, &calibration_frame_)) {
382 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 331 RetryCommand();
383 return; 332 return;
384 } 333 }
385 334
386 // Make sure that the calibration frame has actual samples in it. 335 // Make sure that the calibration frame has actual samples in it.
387 if (calibration_frame_.empty()) { 336 if (calibration_frame_.empty()) {
388 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 337 RetryCommand();
389 return; 338 return;
390 } 339 }
391 340
392 num_read_attempts_ = 1;
393 PerformAction(Action::READ_DATA_FRAME); 341 PerformAction(Action::READ_DATA_FRAME);
394 return; 342 return;
395 } 343 }
396 344
397 case Action::READ_DATA_FRAME: { 345 case Action::READ_DATA_FRAME: {
398 BattOrFrameHeader frame_header; 346 BattOrFrameHeader frame_header;
399 vector<RawBattOrSample> frame; 347 vector<RawBattOrSample> frame;
400 if (!ParseSampleFrame(type, *bytes, next_sequence_number_++, 348 if (!ParseSampleFrame(type, *bytes, next_sequence_number_++,
401 &frame_header, &frame)) { 349 &frame_header, &frame)) {
402 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 350 RetryCommand();
403 return; 351 return;
404 } 352 }
405 353
406 // Check for the empty frame the BattOr uses to indicate it's done 354 // Check for the empty frame the BattOr uses to indicate it's done
407 // streaming samples. 355 // streaming samples.
408 if (frame.empty()) { 356 if (frame.empty()) {
357 // Cancel the next data frame timeout.
358 timeout_callback_.Cancel();
409 CompleteCommand(BATTOR_ERROR_NONE); 359 CompleteCommand(BATTOR_ERROR_NONE);
410 return; 360 return;
411 } 361 }
412 362
413 samples_.insert(samples_.end(), frame.begin(), frame.end()); 363 samples_.insert(samples_.end(), frame.begin(), frame.end());
414 364
415 num_read_attempts_ = 1;
416 PerformAction(Action::READ_DATA_FRAME); 365 PerformAction(Action::READ_DATA_FRAME);
417 return; 366 return;
418 } 367 }
419 368
420 case Action::READ_CURRENT_SAMPLE: 369 case Action::READ_CURRENT_SAMPLE:
421 if (type != BATTOR_MESSAGE_TYPE_CONTROL_ACK || 370 if (type != BATTOR_MESSAGE_TYPE_CONTROL_ACK ||
422 bytes->size() != sizeof(uint32_t)) { 371 bytes->size() != sizeof(uint32_t)) {
423 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 372 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE);
424 return; 373 return;
425 } 374 }
426 375
427 uint32_t sample_num; 376 uint32_t sample_num;
428 memcpy(&sample_num, bytes->data(), sizeof(uint32_t)); 377 memcpy(&sample_num, bytes->data(), sizeof(uint32_t));
429 clock_sync_markers_[sample_num] = pending_clock_sync_marker_; 378 clock_sync_markers_[sample_num] = pending_clock_sync_marker_;
430 last_clock_sync_time_ = base::TimeTicks::Now(); 379 last_clock_sync_time_ = base::TimeTicks::Now();
431 CompleteCommand(BATTOR_ERROR_NONE); 380 CompleteCommand(BATTOR_ERROR_NONE);
432 return; 381 return;
433 382
434 case Action::READ_GIT_HASH: 383 case Action::READ_GIT_HASH:
435 if (type != BATTOR_MESSAGE_TYPE_CONTROL_ACK ){ 384 if (type != BATTOR_MESSAGE_TYPE_CONTROL_ACK) {
436 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 385 RetryCommand();
437 return; 386 return;
438 } 387 }
388
439 firmware_git_hash_ = std::string(bytes->begin(), bytes->end()); 389 firmware_git_hash_ = std::string(bytes->begin(), bytes->end());
440 CompleteCommand(BATTOR_ERROR_NONE); 390 CompleteCommand(BATTOR_ERROR_NONE);
441 return; 391 return;
442 392
443 default: 393 default:
444 CompleteCommand(BATTOR_ERROR_UNEXPECTED_MESSAGE); 394 NOTREACHED();
395 return;
445 } 396 }
446 } 397 }
447 398
448 void BattOrAgent::PerformAction(Action action) { 399 void BattOrAgent::PerformAction(Action action) {
449 DCHECK(thread_checker_.CalledOnValidThread()); 400 DCHECK(thread_checker_.CalledOnValidThread());
450 401
451 timeout_callback_.Reset(
452 base::Bind(&BattOrAgent::OnActionTimeout, AsWeakPtr()));
453 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
454 FROM_HERE, timeout_callback_.callback(),
455 base::TimeDelta::FromSeconds(kBattOrTimeoutSeconds));
456
457 last_action_ = action; 402 last_action_ = action;
458 403
459 switch (action) { 404 switch (action) {
460 case Action::REQUEST_CONNECTION: 405 case Action::REQUEST_CONNECTION:
461 BeginConnect(); 406 BeginConnect();
462 return; 407 return;
463
464 // The following actions are required for StartTracing: 408 // The following actions are required for StartTracing:
465 case Action::SEND_INIT: 409 case Action::SEND_INIT:
466 // Clear out the serial data that may exist from prior init attempts.
467 connection_->Flush();
468
469 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_INIT, 0, 0); 410 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_INIT, 0, 0);
470 return; 411 return;
471 case Action::READ_INIT_ACK: 412 case Action::READ_INIT_ACK:
472 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 413 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
473 return; 414 return;
474 case Action::SEND_SET_GAIN: 415 case Action::SEND_SET_GAIN:
475 // Set the BattOr's gain. Setting the gain tells the BattOr the range of 416 // Set the BattOr's gain. Setting the gain tells the BattOr the range of
476 // power measurements that we expect to see. 417 // power measurements that we expect to see.
477 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_SET_GAIN, BATTOR_GAIN_LOW, 418 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_SET_GAIN, BATTOR_GAIN_LOW,
478 0); 419 0);
479 return; 420 return;
480 case Action::READ_SET_GAIN_ACK: 421 case Action::READ_SET_GAIN_ACK:
481 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 422 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
482 return; 423 return;
483 case Action::SEND_START_TRACING: 424 case Action::SEND_START_TRACING:
484 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_START_SAMPLING_SD, 0, 0); 425 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_START_SAMPLING_SD, 0, 0);
485 return; 426 return;
486 case Action::READ_START_TRACING_ACK: 427 case Action::READ_START_TRACING_ACK:
487 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 428 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
488 return; 429 return;
489
490 // The following actions are required for StopTracing: 430 // The following actions are required for StopTracing:
491 case Action::SEND_EEPROM_REQUEST: 431 case Action::SEND_EEPROM_REQUEST:
492 // Read the BattOr's EEPROM to get calibration information that's required 432 // Read the BattOr's EEPROM to get calibration information that's required
493 // to convert the raw samples to accurate ones. 433 // to convert the raw samples to accurate ones.
494 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_EEPROM, 434 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_EEPROM,
495 sizeof(BattOrEEPROM), 0); 435 sizeof(BattOrEEPROM), 0);
496 return; 436 return;
497 case Action::READ_EEPROM: 437 case Action::READ_EEPROM:
498 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 438 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
499 return; 439 return;
500 case Action::SEND_SAMPLES_REQUEST: 440 case Action::SEND_SAMPLES_REQUEST:
501 // Send a request to the BattOr to tell it to start streaming the samples 441 // Send a request to the BattOr to tell it to start streaming the samples
502 // that it's stored on its SD card over the serial connection. 442 // that it's stored on its SD card over the serial connection.
503 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_SD_UART, 0, 0); 443 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_SD_UART, 0, 0);
504 return; 444 return;
505 case Action::READ_CALIBRATION_FRAME: 445 case Action::READ_CALIBRATION_FRAME:
506 // Data frames are numbered starting at zero and counting up by one each 446 // Data frames are numbered starting at zero and counting up by one each
507 // data frame. We keep track of the next frame sequence number we expect 447 // data frame. We keep track of the next frame sequence number we expect
508 // to see to ensure we don't miss any data. 448 // to see to ensure we don't miss any data.
509 next_sequence_number_ = 0; 449 next_sequence_number_ = 0;
450
451 // Clear stored samples from prior attempts to read sample frames.
452 samples_.clear();
453 calibration_frame_.clear();
510 case Action::READ_DATA_FRAME: 454 case Action::READ_DATA_FRAME:
511 // The first frame sent back from the BattOr contains voltage and current 455 // The first frame sent back from the BattOr contains voltage and current
512 // data that excludes whatever device is being measured from the 456 // data that excludes whatever device is being measured from the
513 // circuit. We use this first frame to establish a baseline voltage and 457 // circuit. We use this first frame to establish a baseline voltage and
514 // current. 458 // current.
515 // 459 //
516 // All further frames contain real (but uncalibrated) voltage and current 460 // All further frames contain real (but uncalibrated) voltage and current
517 // data. 461 // data.
462
463 // Start the next data frame timeout timer.
464 timeout_callback_.Reset(
465 base::Bind(&BattOrAgent::OnActionTimeout, AsWeakPtr()));
466 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
467 FROM_HERE, timeout_callback_.callback(),
468 base::TimeDelta::FromSeconds(kBattOrControlMessageTimeoutSeconds));
charliea (OOO until 10-5) 2017/05/12 22:03:42 Could we move the timeout_callback_.Reset() and th
aschulman 2017/05/15 18:49:16 Done.
469
518 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_SAMPLES); 470 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_SAMPLES);
519 return; 471 return;
520 472
521 // The following actions are required for RecordClockSyncMarker: 473 // The following actions are required for RecordClockSyncMarker:
522 case Action::SEND_CURRENT_SAMPLE_REQUEST: 474 case Action::SEND_CURRENT_SAMPLE_REQUEST:
523 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_SAMPLE_COUNT, 0, 0); 475 SendControlMessage(BATTOR_CONTROL_MESSAGE_TYPE_READ_SAMPLE_COUNT, 0, 0);
524 return; 476 return;
525 case Action::READ_CURRENT_SAMPLE: 477 case Action::READ_CURRENT_SAMPLE:
526 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 478 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
527 return; 479 return;
528 480
529 case Action::SEND_GIT_HASH_REQUEST: 481 case Action::SEND_GIT_HASH_REQUEST:
530 connection_->Flush();
rnephew (Reviews Here) 2017/05/09 15:04:05 Do we not need to flush the connections now with t
aschulman 2017/05/11 13:13:00 see reply in: : tools/battor_agent/battor_connecti
531 SendControlMessage( 482 SendControlMessage(
532 BATTOR_CONTROL_MESSAGE_TYPE_GET_FIRMWARE_GIT_HASH, 0, 0); 483 BATTOR_CONTROL_MESSAGE_TYPE_GET_FIRMWARE_GIT_HASH, 0, 0);
533 return; 484 return;
534 485
535 case Action::READ_GIT_HASH: 486 case Action::READ_GIT_HASH:
536 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK); 487 connection_->ReadMessage(BATTOR_MESSAGE_TYPE_CONTROL_ACK);
537 return; 488 return;
538 489
539 case Action::INVALID: 490 case Action::INVALID:
540 NOTREACHED(); 491 NOTREACHED();
492 return;
541 } 493 }
542 } 494 }
543 495
544 void BattOrAgent::PerformDelayedAction(Action action, base::TimeDelta delay) { 496 void BattOrAgent::PerformDelayedAction(Action action, base::TimeDelta delay) {
545 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask( 497 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
546 FROM_HERE, base::Bind(&BattOrAgent::PerformAction, AsWeakPtr(), action), 498 FROM_HERE, base::Bind(&BattOrAgent::PerformAction, AsWeakPtr(), action),
547 delay); 499 delay);
548 } 500 }
549 501
550 void BattOrAgent::OnActionTimeout() { 502 void BattOrAgent::OnActionTimeout() {
551 switch (last_action_) { 503 switch (last_action_) {
552 case Action::READ_INIT_ACK: 504 case Action::READ_INIT_ACK:
553 if (num_init_attempts_++ < kMaxInitAttempts) { 505 case Action::READ_SET_GAIN_ACK:
554 // OnMessageRead() will fail and retry SEND_INIT.
555 connection_->CancelReadMessage();
556 } else {
557 CompleteCommand(BATTOR_ERROR_TOO_MANY_INIT_RETRIES);
558 }
559 return;
560
561 // TODO(crbug.com/672631): There's currently a BattOr firmware bug that's
562 // causing the BattOr to reset when it's sent the START_TRACING command.
563 // When the BattOr resets, it emits 0x00 to the serial connection. This 0x00
564 // isn't long enough for the connection to consider it a full ack of the
565 // START_TRACING command, so it continues to wait for more data. We handle
566 // this case here by assuming any timeouts while waiting for the
567 // StartTracing ack are related to this bug and retrying the full
568 // initialization sequence.
569 case Action::READ_START_TRACING_ACK: 506 case Action::READ_START_TRACING_ACK:
570 if (num_start_tracing_attempts_ < kMaxStartTracingAttempts) { 507 case Action::READ_EEPROM:
571 // OnMessageRead() will fail and retry StartTracing. 508 case Action::READ_CALIBRATION_FRAME:
572 connection_->CancelReadMessage(); 509 case Action::READ_DATA_FRAME:
573 } else { 510 case Action::READ_GIT_HASH:
574 CompleteCommand(BATTOR_ERROR_TOO_MANY_START_TRACING_RETRIES); 511 connection_->CancelReadMessage();
575 }
576 return; 512 return;
577 513
578 default: 514 default:
579 CompleteCommand(BATTOR_ERROR_TIMEOUT); 515 CompleteCommand(BATTOR_ERROR_TIMEOUT);
580 timeout_callback_.Cancel(); 516 timeout_callback_.Cancel();
581 } 517 }
582 } 518 }
583 519
584 void BattOrAgent::SendControlMessage(BattOrControlMessageType type, 520 void BattOrAgent::SendControlMessage(BattOrControlMessageType type,
585 uint16_t param1, 521 uint16_t param1,
586 uint16_t param2) { 522 uint16_t param2) {
587 DCHECK(thread_checker_.CalledOnValidThread()); 523 DCHECK(thread_checker_.CalledOnValidThread());
588 524
525 // Start control message respone timeout timer.
charliea (OOO until 10-5) 2017/05/12 22:03:42 s/respone/response, but see above comment about fa
aschulman 2017/05/15 18:49:16 Done.
526 timeout_callback_.Reset(
527 base::Bind(&BattOrAgent::OnActionTimeout, AsWeakPtr()));
528 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
529 FROM_HERE, timeout_callback_.callback(),
530 base::TimeDelta::FromSeconds(kBattOrControlMessageTimeoutSeconds));
531
589 BattOrControlMessage msg{type, param1, param2}; 532 BattOrControlMessage msg{type, param1, param2};
590 connection_->SendBytes(BATTOR_MESSAGE_TYPE_CONTROL, &msg, sizeof(msg)); 533 connection_->SendBytes(BATTOR_MESSAGE_TYPE_CONTROL, &msg, sizeof(msg));
591 } 534 }
592 535
536 void BattOrAgent::RetryCommand() {
537 if (++num_command_attempts_ >= kMaxCommandAttempts) {
538 CompleteCommand(BATTOR_ERROR_TOO_MANY_COMMAND_RETRIES);
539 return;
540 }
541
542 // Failed to read response to message, retry current command.
543 switch (command_) {
544 case Command::START_TRACING:
545 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
charliea (OOO until 10-5) 2017/05/12 22:03:42 One thing that might make this a little cleaner is
aschulman 2017/05/15 18:49:16 Done.
546 FROM_HERE, base::Bind(&BattOrAgent::StartTracing, AsWeakPtr()),
547 base::TimeDelta::FromSeconds(kCommandRetryDelaySeconds));
548 return;
549 case Command::STOP_TRACING:
550 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
551 FROM_HERE, base::Bind(&BattOrAgent::StopTracing, AsWeakPtr()),
552 base::TimeDelta::FromSeconds(kCommandRetryDelaySeconds));
553 return;
554 case Command::GET_FIRMWARE_GIT_HASH:
555 base::ThreadTaskRunnerHandle::Get()->PostDelayedTask(
556 FROM_HERE, base::Bind(&BattOrAgent::GetFirmwareGitHash, AsWeakPtr()),
557 base::TimeDelta::FromSeconds(kCommandRetryDelaySeconds));
558 return;
559 default:
560 NOTREACHED();
561 return;
562 }
563 }
564
593 void BattOrAgent::CompleteCommand(BattOrError error) { 565 void BattOrAgent::CompleteCommand(BattOrError error) {
594 switch (command_) { 566 switch (command_) {
595 case Command::START_TRACING: 567 case Command::START_TRACING:
596 base::ThreadTaskRunnerHandle::Get()->PostTask( 568 base::ThreadTaskRunnerHandle::Get()->PostTask(
597 FROM_HERE, base::Bind(&Listener::OnStartTracingComplete, 569 FROM_HERE, base::Bind(&Listener::OnStartTracingComplete,
598 base::Unretained(listener_), error)); 570 base::Unretained(listener_), error));
599 break; 571 break;
600 case Command::STOP_TRACING: 572 case Command::STOP_TRACING:
601 base::ThreadTaskRunnerHandle::Get()->PostTask( 573 base::ThreadTaskRunnerHandle::Get()->PostTask(
602 FROM_HERE, 574 FROM_HERE,
603 base::Bind(&Listener::OnStopTracingComplete, 575 base::Bind(&Listener::OnStopTracingComplete,
604 base::Unretained(listener_), SamplesToString(), error)); 576 base::Unretained(listener_), SamplesToString(), error));
605 break; 577 break;
606 case Command::RECORD_CLOCK_SYNC_MARKER: 578 case Command::RECORD_CLOCK_SYNC_MARKER:
607 base::ThreadTaskRunnerHandle::Get()->PostTask( 579 base::ThreadTaskRunnerHandle::Get()->PostTask(
608 FROM_HERE, base::Bind(&Listener::OnRecordClockSyncMarkerComplete, 580 FROM_HERE, base::Bind(&Listener::OnRecordClockSyncMarkerComplete,
609 base::Unretained(listener_), error)); 581 base::Unretained(listener_), error));
610 break; 582 break;
611 case Command::GET_FIRMWARE_GIT_HASH: 583 case Command::GET_FIRMWARE_GIT_HASH:
612 base::ThreadTaskRunnerHandle::Get()->PostTask( 584 base::ThreadTaskRunnerHandle::Get()->PostTask(
613 FROM_HERE, base::Bind(&Listener::OnGetFirmwareGitHashComplete, 585 FROM_HERE, base::Bind(&Listener::OnGetFirmwareGitHashComplete,
614 base::Unretained(listener_), 586 base::Unretained(listener_),
615 firmware_git_hash_, error)); 587 firmware_git_hash_, error));
616 break; 588 break;
617 case Command::INVALID: 589 case Command::INVALID:
618 NOTREACHED(); 590 NOTREACHED();
591 return;
619 } 592 }
620 593
621 last_action_ = Action::INVALID; 594 last_action_ = Action::INVALID;
622 command_ = Command::INVALID; 595 command_ = Command::INVALID;
623 pending_clock_sync_marker_.clear(); 596 pending_clock_sync_marker_.clear();
624 battor_eeprom_.reset(); 597 battor_eeprom_.reset();
625 calibration_frame_.clear(); 598 calibration_frame_.clear();
626 samples_.clear(); 599 samples_.clear();
627 next_sequence_number_ = 0; 600 next_sequence_number_ = 0;
601 num_command_attempts_ = 0;
628 } 602 }
629 603
630 std::string BattOrAgent::SamplesToString() { 604 std::string BattOrAgent::SamplesToString() {
631 if (calibration_frame_.empty() || samples_.empty() || !battor_eeprom_) 605 if (calibration_frame_.empty() || samples_.empty() || !battor_eeprom_)
632 return ""; 606 return "";
633 607
634 BattOrSampleConverter converter(*battor_eeprom_, calibration_frame_); 608 BattOrSampleConverter converter(*battor_eeprom_, calibration_frame_);
635 609
636 std::stringstream trace_stream; 610 std::stringstream trace_stream;
637 trace_stream << std::fixed; 611 trace_stream << std::fixed;
(...skipping 23 matching lines...) Expand all
661 if (clock_sync_marker != clock_sync_markers_.end()) 635 if (clock_sync_marker != clock_sync_markers_.end())
662 trace_stream << " <" << clock_sync_marker->second << ">"; 636 trace_stream << " <" << clock_sync_marker->second << ">";
663 637
664 trace_stream << std::endl; 638 trace_stream << std::endl;
665 } 639 }
666 640
667 return trace_stream.str(); 641 return trace_stream.str();
668 } 642 }
669 643
670 } // namespace battor 644 } // namespace battor
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698