Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: components/translate/core/browser/translate_ranker_model_loader.h

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: Initial CL Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_
6 #define COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_
7
8 #include <memory>
9 #include <string>
10
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/files/file_util.h"
14 #include "base/files/important_file_writer.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/ptr_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/metrics/histogram_macros.h"
19 #include "base/profiler/scoped_tracker.h"
20 #include "base/single_thread_task_runner.h"
21 #include "base/strings/string_util.h"
22 #include "base/synchronization/lock.h"
23 #include "base/task_runner.h"
24 #include "base/task_scheduler/post_task.h"
25 #include "base/task_scheduler/task_traits.h"
26 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
27 #include "components/translate/core/browser/translate_url_fetcher.h"
28
29 namespace translate {
30
31 // If enabled, downloads a translate ranker model and uses it to determine
32 // whether the user should be given a translation prompt or not.
33 template <typename T>
34 class ModelLoader {
35 public:
36 typedef typename T::ModelType ModelType;
gab 2016/12/19 21:00:46 using T::ModelType; should do the trick I think.
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
37
38 // A callback used by the model loader to determine whether a loaded model
39 // is suitable/valid by some measure. May be called multiple times.
40 typedef base::RepeatingCallback<bool(const ModelType&)> IsValidFunc;
41
42 // Called with a non-null model unique_ptr when the loader has successfully
43 // loaded a compatible model, or with a null unique_ptr if the loader has
44 // failed to load a model after exhausting it alloted retry attempts. May be
45 // called multiple times if the cached model is compatible but out of date:
46 // once when the compatible cached model becomes available, and once again
47 // after downloading and validating an up-to-date mode.
48 typedef base::RepeatingCallback<void(std::unique_ptr<ModelType>)>
49 OnAvailableFunc;
gab 2016/12/19 21:00:46 As of C++11, using OnAvailableCallback = base
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
50
51 ModelLoader();
52 ~ModelLoader() = default;
gab 2016/12/19 21:00:46 Destructors should be defined out of line for clas
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
53
54 // Sets the file path from which to load the cached model. This path will also
55 // be used to store a more up-to-date model if available.
56 ModelLoader& set_cache_file_path(const base::FilePath& cache_file_path) {
57 cache_file_path_ = cache_file_path;
58 return *this;
59 }
60
61 // Sets the URL from which to download the model. This URL will be used if
62 // there is no cached model or if the cached model is not up-to-date.
63 ModelLoader& set_download_url(const GURL& download_url) {
64 download_url_ = download_url;
65 return *this;
66 }
67
68 // Sets the callback that the model loader will use to validate that a given
69 // model is compatible with the callers needs.
70 ModelLoader& set_is_compatible_func(IsValidFunc f) {
pasko 2016/12/19 14:26:49 why is it necessary to inject various functions li
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Moved this functionality to a ModelObserver interf
71 is_compatible_func_ = f;
72 return *this;
73 }
74
75 // Sets the callback that the model loader will use to validate that a given
76 // model is sufficiently up-to-date per the callers expectations. For example,
77 // if the caller knows out-of-band the version of the most recent model. By
78 // implication, version information is expected to be embedded in the model.
79 ModelLoader& set_is_up_to_date_func(IsValidFunc f) {
80 is_up_to_date_func_ = f;
81 return *this;
82 }
83
84 // Sets the callback that the model loader will use to notify the caller that
85 // a compatible model is available.
86 ModelLoader& set_on_model_available_func(OnAvailableFunc callback) {
87 on_model_available_func_ = callback;
88 return *this;
89 }
90
91 // Asynchronously initiates loading the model from the cache file path and URL
92 // previously configured.
93 void Start();
94
95 // Call this method periodically to notify the downloader that translate is
96 // being used. This is used as a proxy notification for network activity.
97 // If a model download is pending, this will trigger (subject to retry and
98 // frequency limits) the download.
99 void NotifyOfTranslateEvent();
100
101 private:
102 // Enumeration denoting the outcome of an attempt to download the model. This
103 // must be kept in sync with the TranslateRankerModelStatus enum in
104 // histograms.xml
105 // TODO(rogerm): rename the enum in histograms.xml to be more generic
106 enum ModelStatus {
107 MODEL_STATUS_OK = 0,
108 MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
109 MODEL_STATUS_DOWNLOAD_FAILED = 2,
110 MODEL_STATUS_PARSE_FAILED = 3,
111 MODEL_STATUS_VALIDATION_FAILED = 4,
112 // Insert new values above this line.
113 MODEL_STATUS_MAX
114 };
115
116 // The maximum number of model download attempts to make. Download may fail
117 // due to server error or network availability issues.
118 const int kMaxRetryOn5xx = 8;
119
120 // The minimum duration, in minutes, between download attempts.
121 const int kDownloadRefractoryPeriodMin = 3;
122
123 // Log the result of loading a model to UMA.
124 void ReportModelStatus(ModelStatus model_status);
125
126 // Called to construct a model from the given |data|.
127 std::unique_ptr<ModelType> Parse(const std::string& data);
128
129 // Task functor to read the model from cache and/or kick off a model download.
gab 2016/12/19 21:00:46 s/Task functor to read .../Reads.../ ?
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
130 void LoadData();
131
132 // Called when the background task to download the model from |download_url_|
133 // has completed.
134 void OnDownloadComplete(int id, bool success, const std::string& data);
135
136 // Task functor to write |data| to the model's cache file path.
137 void SaveData(const std::string& data);
138
139 // The prefix to prepend to all UMA metrics generated by this loader.
140 const std::string uma_prefix_;
141
142 // Used to protect the creation/destruction of the fetcher.
143 base::Lock lock_;
144
145 // The task runner with which to perform background IO to read, download and
146 // cache the model. This must be a SingleThreadTaskRunner due to legacy
147 // requirements of the URLFetcher.
fdoray 2016/12/19 15:18:29 // TODO(fdoray): Make this a SequencedTaskRunner o
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
148 scoped_refptr<base::SingleThreadTaskRunner> task_runner_;
149
150 // Used to download model data from |download_url_|.
151 // TODO(rogerm): Use net::URLFetcher directly?
152 std::unique_ptr<TranslateURLFetcher> url_fetcher_;
153
154 // The next time before which no new attempts to download the model should be
155 // attempted.
156 base::Time next_earliest_download_time_;
157
158 // Tracks the last time of the last attempt to download a model. Used for UMA
159 // reporting of download duration.
160 base::Time download_start_time_;
161
162 // The path at which the model is (or should be) cached.
163 base::FilePath cache_file_path_;
164
165 // The URL from which to download the model if the model is not in the cache
166 // or the cached model is invalid/expired.
167 GURL download_url_;
168
169 // Functor used to check if a model is compatible with the caller.
gab 2016/12/19 21:00:46 nit: s/Functor/Callback/ (and below)
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
170 IsValidFunc is_compatible_func_;
171
172 // Functor used to check if a model is up-to-date.
173 IsValidFunc is_up_to_date_func_;
174
175 // Functor used to notify the caller on the availability of a compatible
176 // model.
177 OnAvailableFunc on_model_available_func_;
178
179 DISALLOW_COPY_AND_ASSIGN(ModelLoader);
180 };
181
182 template <typename T>
183 ModelLoader<T>::ModelLoader()
184 : task_runner_(base::CreateSingleThreadTaskRunnerWithTraits(
185 base::TaskTraits()
186 .WithPriority(base::TaskPriority::BACKGROUND)
187 .WithShutdownBehavior(
188 base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN)
189 .WithWait()
190 .WithFileIO())) {}
191
192 template <typename T>
193 void ModelLoader<T>::Start() {
194 task_runner_->PostTask(
195 FROM_HERE, base::Bind(&ModelLoader<T>::LoadData, base::Unretained(this)));
pasko 2016/12/19 14:26:49 base::Unretained(this): what ensures that this mod
Roger McFarlane (Chromium) 2017/02/08 23:08:08 The traits of the task runner are to skip running
196 }
197
198 template <typename T>
199 void ModelLoader<T>::NotifyOfTranslateEvent() {
pasko 2016/12/19 14:26:49 please consider moving the implementation to the .
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Switched to a fixed ranker model proto instead of
200 // Immediate exit if no download is pending.
201 if (!url_fetcher_)
202 return;
203
204 // Serialize calls to this rest of this method.
205 base::AutoLock auto_lock(lock_);
206
207 // Validate that a download is still pending.
208 if (!url_fetcher_)
209 return;
210
211 // If a request is already in flight, do not issue a new one.
212 if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
213 DVLOG(2) << "ModelLoader: Download is in progress.";
214 return;
215 }
216 // Do nothing if the download attempts should be throttled.
217 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
218 DVLOG(2) << "TranslateRanker: Last download attempt was too recent.";
219 return;
220 }
221
222 DVLOG(2) << "Downloading model from: " << download_url_;
223
224 // Reset the time of the next earliest allowable download attempt.
225 next_earliest_download_time_ =
226 base::Time::NowFromSystemTime() +
227 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
gab 2016/12/19 21:00:46 Use TimeTicks to compute any time difference, time
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
228
229 // Kick off the next download attempt.
230 download_start_time_ = base::Time::Now();
231 bool result = url_fetcher_->Request(
232 download_url_,
233 base::Bind(&ModelLoader<T>::OnDownloadComplete, base::Unretained(this)));
pasko 2016/12/19 14:26:49 what guarantees that this callback does not get ru
Roger McFarlane (Chromium) 2017/02/08 23:08:08 It's on the current threads task runner. Which is
234
235 // The maximum number of download attempts has been surpassed. Don't make
236 // any further attempts.
237 if (!result) {
238 DVLOG(2) << "Model download abandoned.";
239 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
240 url_fetcher_.reset();
241 }
242 }
243
244 template <typename T>
245 void ModelLoader<T>::ReportModelStatus(
246 typename ModelLoader<T>::ModelStatus model_status) {
247 UMA_HISTOGRAM_ENUMERATION(T::kModelStatusHistogram, model_status,
248 MODEL_STATUS_MAX);
249 }
250
251 template <typename T>
252 std::unique_ptr<typename T::ModelType> ModelLoader<T>::Parse(
253 const std::string& data) {
254 SCOPED_UMA_HISTOGRAM_TIMER(T::kParsetimerHistogram);
255
256 auto model = base::MakeUnique<chrome_intelligence::TranslateRankerModel>();
257
258 if (!model->ParseFromString(data)) {
259 ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
260 return nullptr;
261 }
262
263 if (!is_compatible_func_.Run(*model)) {
264 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
265 return nullptr;
266 }
267
268 ReportModelStatus(MODEL_STATUS_OK);
269 return model;
270 }
271
272 template <typename T>
273 void ModelLoader<T>::LoadData() {
274 // Attempt to read the model data from the cache file.
275 std::string data;
276 if (!cache_file_path_.empty()) {
277 DVLOG(2) << "Loading model from: " << cache_file_path_.value();
278 SCOPED_UMA_HISTOGRAM_TIMER(T::kReadTimerHistogram);
279 if (!base::ReadFileToString(cache_file_path_, &data))
280 data.clear();
281 }
282
283 // If the model was successfully was read and is compatible, then notify
284 // the "owner" of this model loader of the models availability (transferring
285 // ownership of the model). If the model is further, up to date, then there
286 // is no further work to be done.
287 if (!data.empty()) {
288 std::unique_ptr<ModelType> model = Parse(data);
289 if (model) {
290 bool is_up_to_date = is_up_to_date_func_.Run(*model);
291 on_model_available_func_.Run(std::move(model));
gab 2016/12/19 21:00:46 From the API it wasn't obvious to me that this cal
Roger McFarlane (Chromium) 2017/02/08 23:08:08 I've switched the API to an observer model, where
292 if (is_up_to_date)
293 return;
294 }
295 }
296
297 // Reaching this point means that a model download is required. If there is
298 // no download URL configured, then there is nothing further to do.
299 if (!download_url_.is_valid())
300 return;
301
302 // Otherwise, initialize the model fetcher to be non-null and trigger an
303 // initial download itempt.
gab 2016/12/19 21:00:46 nit: itempt?
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
304 url_fetcher_.reset(new TranslateURLFetcher(T::kFetcherId));
305 url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
306 NotifyOfTranslateEvent();
307 }
308
309 template <typename T>
310 void ModelLoader<T>::OnDownloadComplete(int /* id */,
311 bool success,
312 const std::string& data) {
313 UMA_HISTOGRAM_MEDIUM_TIMES(T::kDownloadTimerHistogram,
314 base::Time::Now() - download_start_time_);
fdoray 2016/12/19 15:18:29 Use TimeTicks instead of Time to compute the the a
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
315
316 // On failure, we just abort. The TranslateRanker will retry on a subsequent
317 // translation opportunity. The TranslateURLFetcher enforces a limit for
318 // retried requests.
319 if (!success)
320 return;
321
322 auto model = Parse(data);
323 if (!model)
324 return;
325
326 // Do we have the most recent model? Check now, before transferring ownership
327 // of the model away from this method.
328 bool is_up_to_date = is_up_to_date_func_.Run(*model);
329
330 // Notify the owner that a compatible model is available.
331 on_model_available_func_.Run(std::move(model));
332
333 // It he model is the most recent, cache it and discontinue download attempts.
334 if (is_up_to_date) {
335 if (!cache_file_path_.empty()) {
336 task_runner_->PostTask(
337 FROM_HERE,
338 base::Bind(&ModelLoader<T>::SaveData, base::Unretained(this), data));
339 }
340 base::AutoLock auto_lock(lock_);
341 url_fetcher_.reset();
342 }
343 }
344
345 template <typename T>
346 void ModelLoader<T>::SaveData(const std::string& data) {
347 DCHECK(!cache_file_path_.empty());
348 SCOPED_UMA_HISTOGRAM_TIMER(T::kWriteTimerHistogram);
349 base::ImportantFileWriter::WriteFileAtomically(cache_file_path_, data);
350 }
351
352 class TranslateRankerModelTraits {
pasko 2016/12/19 14:26:49 Do you have plans to add more traits? If not, plea
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
353 public:
354 typedef typename chrome_intelligence::TranslateRankerModel ModelType;
355 static const int kFetcherId;
356 static const char kWriteTimerHistogram[];
357 static const char kReadTimerHistogram[];
358 static const char kDownloadTimerHistogram[];
359 static const char kParsetimerHistogram[];
360 static const char kModelStatusHistogram[];
361 };
362
363 typedef ModelLoader<TranslateRankerModelTraits> TranslateRankerModelLoader;
364
365 } // namespace translate
366
367 #endif // COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698