Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(275)

Side by Side Diff: components/translate/core/browser/ranker_model_loader.cc

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: for asan testing only - DO NOT COMMIT Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/translate/core/browser/ranker_model_loader.h"
6
7 #include "base/bind.h"
8 #include "base/bind_helpers.h"
9 #include "base/command_line.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/files/important_file_writer.h"
13 #include "base/macros.h"
14 #include "base/memory/ptr_util.h"
15 #include "base/memory/ref_counted.h"
16 #include "base/metrics/histogram_macros.h"
17 #include "base/profiler/scoped_tracker.h"
18 #include "base/strings/string_util.h"
19 #include "base/task_scheduler/post_task.h"
20 #include "base/threading/sequenced_task_runner_handle.h"
21 #include "components/translate/core/browser/proto/ranker_model.pb.h"
22 #include "components/translate/core/browser/translate_url_fetcher.h"
23 #include "components/translate/core/common/translate_switches.h"
24 #include "components/variations/variations_associated_data.h"
25
26 namespace translate {
27
28 namespace {
29
30 const int kUrlFetcherId = 2;
groby-ooo-7-16 2017/02/14 20:56:47 Why do we hardcode a fetcher ID? I've only seen th
fdoray 2017/02/15 20:07:40 constexpr here and below
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I followed the other examples of the TranslateURLF
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
31
32 // The maximum number of model download attempts to make. Download may fail
33 // due to server error or network availability issues.
34 const int kMaxRetryOn5xx = 8;
35
36 // The minimum duration, in minutes, between download attempts.
37 const int kDownloadRefractoryPeriodMin = 3;
groby-ooo-7-16 2017/02/14 20:56:47 1) since this is a duration, I'd love for this to
Roger McFarlane (Chromium) 2017/02/21 22:16:48 I'll make this an input param (via an options clas
38
39 // Suffixes for the various histograms produced by the backend.
40 const char kWriteTimerHistogram[] = ".Timer.WriteModel";
41 const char kReadTimerHistogram[] = ".Timer.ReadModel";
42 const char kDownloadTimerHistogram[] = ".Timer.DownloadModel";
43 const char kParsetimerHistogram[] = ".Timer.ParseModel";
44 const char kModelStatusHistogram[] = ".Model.Status";
45
46 // A helper class to produce a scoped timer histogram that supports using a
47 // non-static-const name.
48 class MyScopedHistogramTimer {
49 public:
50 MyScopedHistogramTimer(const base::StringPiece& name)
groby-ooo-7-16 2017/02/14 20:56:48 FWIW: I take it the UMA peeps are OK with dynamica
Roger McFarlane (Chromium) 2017/02/21 22:16:50 I got this recipe from the UMA folks, yes. I'll a
51 : name_(name.begin(), name.end()), start_(base::TimeTicks::Now()) {}
52
53 ~MyScopedHistogramTimer() {
54 base::TimeDelta duration = base::TimeTicks::Now() - start_;
55 base::HistogramBase* counter = base::Histogram::FactoryTimeGet(
56 name_, base::TimeDelta::FromMilliseconds(10),
57 base::TimeDelta::FromMilliseconds(200000), 100,
58 base::HistogramBase::kUmaTargetedHistogramFlag);
59 if (counter)
60 counter->AddTime(duration);
61 }
62
63 private:
64 const std::string name_;
65 const base::TimeTicks start_;
66
67 DISALLOW_COPY_AND_ASSIGN(MyScopedHistogramTimer);
68 };
69
70 bool IsExpired(const chrome_intelligence::RankerModel& model) {
groby-ooo-7-16 2017/02/14 20:56:47 Should this be a member on the model?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
71 DCHECK(model.has_metadata());
72 const auto& metadata = model.metadata();
73
74 // If the age of the model cannot be determined, presume it to be expired.
75 if (!metadata.has_last_modified_sec())
76 return true;
77
78 // If the model has no set cache duration, then it never expires.
79 if (!metadata.has_cache_duration_sec() || metadata.cache_duration_sec() == 0)
80 return false;
81
82 // Otherwise, a model is expired if its age exceeds the cache duration.
83 base::Time last_modified =
84 base::Time() + base::TimeDelta::FromSeconds(metadata.last_modified_sec());
85 base::TimeDelta age = base::Time::Now() - last_modified;
86 return age > base::TimeDelta::FromSeconds(metadata.cache_duration_sec());
87 }
88
89 } // namespace
90
91 class RankerModelLoader::Backend {
92 public:
93 Backend(const ValidateCallback& validate_callback,
94 const OnModelAvailableCallback& on_model_available_callback,
95 const base::FilePath& model_path,
96 const GURL& model_url,
97 const std::string& uma_prefix);
98 ~Backend();
99
100 // Reads the model from |model_path_|.
101 void LoadFromFile();
102
103 // Reads the model |model_url_|.
104 void LoadFromURL();
105
106 private:
107 // Log the result of loading a model to UMA.
108 void ReportModelStatus(RankerModelStatus model_status);
109
110 // Called to construct a model from the given |data|.
groby-ooo-7-16 2017/02/14 20:56:48 nit: Please don't use passive voice: "Constructs a
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
111 std::unique_ptr<chrome_intelligence::RankerModel> Parse(
112 const std::string& data);
113
114 // Called the download initiated by LoadFromURL() completes.
groby-ooo-7-16 2017/02/14 20:56:47 Would you mind documenting |id|?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
115 void OnDownloadComplete(int id, bool success, const std::string& data);
116
117 // Transfer ownership of |model| to the |observer_|.
hamelphi 2017/02/13 19:18:58 |observer_| is not defined in this class. Do you m
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I updated the comment and method names to refer to
118 void TransferModelToObserver(
119 std::unique_ptr<chrome_intelligence::RankerModel> model);
120
121 // Validates that ranker model loader tasks are all performed on the correct
122 // sequence.
123 base::SequenceChecker sequence_checker_;
124
125 // The TaskRunner on which this was constructed.
groby-ooo-7-16 2017/02/14 20:56:48 nit: "On which |this| was constructed"
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
126 const scoped_refptr<base::SequencedTaskRunner> origin_task_runner_;
127
128 // Validates a ranker model on behalf of the model loader client. This may
129 // be called on any sequence and must, therefore, be thread-safe.
130 const ValidateCallback validate_callback_;
131
132 // Transfers ownership of a loaded model back to the model loader client.
133 // This will be called on the sequence on which the model loader was
134 // constructed.
135 const OnModelAvailableCallback on_model_available_callback_;
136
137 // The path at which the model is (or should be) cached.
138 const base::FilePath model_path_;
139
140 // The URL from which to download the model if the model is not in the cache
141 // or the cached model is invalid/expired.
142 const GURL model_url_;
143
144 // This will prefix all UMA metrics generated by the model loader.
145 const std::string uma_prefix_;
146
147 // Used to download model data from |model_url_|.
148 // TODO(rogerm): Use net::URLFetcher directly?
149 std::unique_ptr<TranslateURLFetcher> url_fetcher_;
150
151 // The next time before which no new attempts to download the model should be
152 // attempted.
153 base::TimeTicks next_earliest_download_time_;
groby-ooo-7-16 2017/02/14 20:56:47 Why not just use a timer to schedule the next atte
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I'm actually using this a throttle, not a deadline
154
155 // Tracks the last time of the last attempt to download a model. Used for UMA
156 // reporting of download duration.
157 base::TimeTicks download_start_time_;
158
159 DISALLOW_COPY_AND_ASSIGN(Backend);
160 };
161
162 RankerModelLoader::RankerModelLoader(
163 const ValidateCallback& validate_callback,
164 const OnModelAvailableCallback& on_model_available_callback,
165 const base::FilePath& model_path,
166 const GURL& model_url,
167 const std::string& uma_prefix)
168 : backend_task_runner_(base::CreateSequencedTaskRunnerWithTraits(
169 base::TaskTraits().MayBlock())),
fdoray 2017/02/15 20:07:40 Backend doesn't access memory that could be freed
Roger McFarlane (Chromium) 2017/02/21 22:16:50 Done. The backend forwards downloaded models to t
170 backend_(base::MakeUnique<Backend>(validate_callback,
171 on_model_available_callback,
172 model_path,
173 model_url,
174 uma_prefix)) {}
175
176 RankerModelLoader::~RankerModelLoader() {
177 // This is guaranteed to be sequenced after any pending backend operation.
178 backend_task_runner_->DeleteSoon(FROM_HERE, backend_.release());
179 }
180
181 void RankerModelLoader::Start() {
182 backend_task_runner_->PostTask(
183 FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromFile,
184 base::Unretained(backend_.get())));
185 }
186
187 void RankerModelLoader::NotifyOfNetworkAvailability() {
188 backend_task_runner_->PostTask(
189 FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromURL,
190 base::Unretained(backend_.get())));
191 }
192
groby-ooo-7-16 2017/02/14 20:56:48 Maybe at least a double line separator to make it
Roger McFarlane (Chromium) 2017/02/21 22:16:48 Done.
193 RankerModelLoader::Backend::Backend(
194 const ValidateCallback& validate_callback,
195 const OnModelAvailableCallback& on_model_available_callback,
196 const base::FilePath& model_path,
197 const GURL& model_url,
198 const std::string& uma_prefix)
199 : origin_task_runner_(base::SequencedTaskRunnerHandle::Get()),
200 validate_callback_(validate_callback),
201 on_model_available_callback_(on_model_available_callback),
202 model_path_(model_path),
203 model_url_(model_url),
204 uma_prefix_(uma_prefix) {
205 sequence_checker_.DetachFromSequence();
206 }
207
208 RankerModelLoader::Backend::~Backend() = default;
groby-ooo-7-16 2017/02/14 20:56:47 curious - why RankerModelLoader::Backend::~B
Roger McFarlane (Chromium) 2017/02/21 22:16:48 they're equivalent, so no particular reason.
209
210 void RankerModelLoader::Backend::ReportModelStatus(
211 RankerModelStatus model_status) {
212 base::HistogramBase* histogram = base::LinearHistogram::FactoryGet(
213 uma_prefix_ + kModelStatusHistogram, 1,
214 static_cast<int>(RankerModelStatus::MAX),
215 static_cast<int>(RankerModelStatus::MAX) + 1,
216 base::HistogramBase::kUmaTargetedHistogramFlag);
217 if (histogram)
218 histogram->Add(static_cast<int>(model_status));
219 }
220
221 std::unique_ptr<chrome_intelligence::RankerModel>
222 RankerModelLoader::Backend::Parse(const std::string& data) {
223 DCHECK(sequence_checker_.CalledOnValidSequence());
224 MyScopedHistogramTimer timer(uma_prefix_ + kParsetimerHistogram);
225
226 auto model = base::MakeUnique<chrome_intelligence::RankerModel>();
227
228 if (!model->ParseFromString(data)) {
229 ReportModelStatus(RankerModelStatus::PARSE_FAILED);
230 return nullptr;
231 }
232
233 RankerModelStatus model_status = validate_callback_.Run(*model);
groby-ooo-7-16 2017/02/14 20:56:48 Thought: If the callback took a ptr, and you had a
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Not quite. I might have a non-null, but invalid mo
234 ReportModelStatus(model_status);
235 if (model_status != RankerModelStatus::OK)
236 return nullptr;
237
238 return model;
239 }
240
241 void RankerModelLoader::Backend::LoadFromFile() {
242 DCHECK(sequence_checker_.CalledOnValidSequence());
243 // Attempt to read the model data from the cache file.
244 std::string data;
245 if (!model_path_.empty()) {
246 DVLOG(2) << "Attempting to load model from: " << model_path_.value();
247 MyScopedHistogramTimer timer(uma_prefix_ + kReadTimerHistogram);
248 if (!base::ReadFileToString(model_path_, &data)) {
249 DVLOG(2) << "Failed to read model from: " << model_path_.value();
250 data.clear();
251 }
252 }
253
254 // If the model was successfully was read and is compatible, then notify
255 // the "owner" of this model loader of the models availability (transferring
256 // ownership of the model). If the model originated at the model_url_ and has
257 // not expired then there is no need to download a new one.
258 if (!data.empty()) {
259 std::unique_ptr<chrome_intelligence::RankerModel> model = Parse(data);
260 if (model) {
261 std::string url_spec = model->metadata().source();
262 bool is_expired = IsExpired(*model);
263 DVLOG(2) << (is_expired ? "Expired m" : "M") << "odel in '"
264 << model_path_.value() << "'' was originally downloaded from '"
265 << url_spec << "'.";
266
267 TransferModelToObserver(std::move(model));
268
269 if (url_spec == model_url_.spec() && !is_expired)
hamelphi 2017/02/13 19:18:58 Maybe comment on what it means if either condition
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
270 return;
271 }
272 }
273
274 // Reaching this point means that a model download is required. If there is
275 // no download URL configured, then there is nothing further to do.
276 if (!model_url_.is_valid())
277 return;
278
279 // Otherwise, initialize the model fetcher to be non-null and trigger an
280 // initial download attempt.
281 url_fetcher_ = base::MakeUnique<TranslateURLFetcher>(kUrlFetcherId);
282 url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
283 LoadFromURL();
groby-ooo-7-16 2017/02/14 20:56:48 FWIW, LoadFromURL and LoadFromFile have slightly a
Roger McFarlane (Chromium) 2017/02/21 22:16:50 I made them a bit more symmetric. url_fetcher_ ==
284 }
285
286 void RankerModelLoader::Backend::LoadFromURL() {
287 DCHECK(sequence_checker_.CalledOnValidSequence());
288 // Immediately exit if there is no download pending.
289 if (!url_fetcher_)
290 return;
291
292 // If a request is already in flight, do not issue a new one.
groby-ooo-7-16 2017/02/14 20:56:47 IIUC, we only need this because we're currently us
Roger McFarlane (Chromium) 2017/02/21 22:16:49 By transition-based approach, you mean watching th
293 if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
294 DVLOG(2) << "RankerModelLoader: Download is in progress.";
295 return;
296 }
297 // Do nothing if the download attempts should be throttled.
298 if (base::TimeTicks::Now() < next_earliest_download_time_) {
299 DVLOG(2) << "ModelLoadser: Last download attempt was too recent.";
hamelphi 2017/02/13 19:18:58 s\ModelLoadser\ModelLoader
Roger McFarlane (Chromium) 2017/02/21 22:16:50 Done.
300 return;
301 }
302
303 DVLOG(2) << "Downloading model from: " << model_url_;
304
305 // Reset the time of the next earliest allowable download attempt.
306 next_earliest_download_time_ =
307 base::TimeTicks::Now() +
308 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
309
310 // Kick off the next download attempt.
311 download_start_time_ = base::TimeTicks::Now();
312 bool result = url_fetcher_->Request(
313 model_url_, base::Bind(&RankerModelLoader::Backend::OnDownloadComplete,
314 base::Unretained(this)));
315
316 // The maximum number of download attempts has been surpassed. Don't make
317 // any further attempts.
318 if (!result) {
319 DVLOG(2) << "Model download abandoned.";
320 ReportModelStatus(RankerModelStatus::DOWNLOAD_FAILED);
321 url_fetcher_.reset();
322 }
323 }
324
325 void RankerModelLoader::Backend::OnDownloadComplete(int /* id */,
326 bool success,
327 const std::string& data) {
328 DCHECK(sequence_checker_.CalledOnValidSequence());
329
330 // Record the duration of the download.
331 base::TimeDelta duration = base::TimeTicks::Now() - download_start_time_;
332 base::HistogramBase* counter = base::Histogram::FactoryTimeGet(
333 uma_prefix_ + kDownloadTimerHistogram,
334 base::TimeDelta::FromMilliseconds(10),
335 base::TimeDelta::FromMilliseconds(200000), 100,
336 base::HistogramBase::kUmaTargetedHistogramFlag);
337 if (counter)
338 counter->AddTime(duration);
339
340 // On failure, we just abort. The TranslateRanker will retry on a subsequent
341 // translation opportunity. The TranslateURLFetcher enforces a limit for
groby-ooo-7-16 2017/02/14 20:56:48 Should we instead retry once network is available?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 See my previous comments/thoughts/questions about
342 // retried requests.
343 if (!success || data.empty()) {
344 DVLOG(2) << "Download from '" << model_url_ << "'' failed.";
345 return;
346 }
347
348 auto model = Parse(data);
349 if (!model) {
350 DVLOG(2) << "Model from '" << model_url_ << "'' not valid.";
351 return;
352 }
353
354 url_fetcher_.reset();
355
356 auto* metadata = model->mutable_metadata();
357 metadata->set_source(model_url_.spec());
358 metadata->set_last_modified_sec(
359 (base::Time::Now() - base::Time()).InSeconds());
360
361 if (!model_path_.empty()) {
362 DVLOG(2) << "Saving model from '" << model_url_ << "'' to '"
363 << model_path_.value() << "'.";
364 MyScopedHistogramTimer timer(uma_prefix_ + kWriteTimerHistogram);
365 base::ImportantFileWriter::WriteFileAtomically(model_path_,
366 model->SerializeAsString());
367 }
368
369 // Notify the owner that a compatible model is available.
370 TransferModelToObserver(std::move(model));
371 }
372
373 void RankerModelLoader::Backend::TransferModelToObserver(
374 std::unique_ptr<chrome_intelligence::RankerModel> model) {
375 DCHECK(sequence_checker_.CalledOnValidSequence());
376 origin_task_runner_->PostTask(
377 FROM_HERE,
378 base::Bind(on_model_available_callback_, base::Passed(std::move(model))));
379 }
380
381 } // namespace translate
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698