 Chromium Code Reviews
 Chromium Code Reviews Issue 2565873002:
  [translate] Add translate ranker model loader.  (Closed)
    
  
    Issue 2565873002:
  [translate] Add translate ranker model loader.  (Closed) 
  | OLD | NEW | 
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_ | |
| 6 #define COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_ | |
| 7 | |
| 8 #include <memory> | |
| 9 #include <string> | |
| 10 | |
| 11 #include "base/bind.h" | |
| 12 #include "base/bind_helpers.h" | |
| 13 #include "base/files/file_util.h" | |
| 14 #include "base/files/important_file_writer.h" | |
| 15 #include "base/gtest_prod_util.h" | |
| 16 #include "base/memory/ptr_util.h" | |
| 17 #include "base/memory/ref_counted.h" | |
| 18 #include "base/metrics/histogram_macros.h" | |
| 19 #include "base/profiler/scoped_tracker.h" | |
| 20 #include "base/single_thread_task_runner.h" | |
| 21 #include "base/strings/string_util.h" | |
| 22 #include "base/synchronization/lock.h" | |
| 23 #include "base/task_runner.h" | |
| 24 #include "base/task_scheduler/post_task.h" | |
| 25 #include "base/task_scheduler/task_traits.h" | |
| 26 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h" | |
| 27 #include "components/translate/core/browser/translate_url_fetcher.h" | |
| 28 | |
| 29 namespace translate { | |
| 30 | |
| 31 // If enabled, downloads a translate ranker model and uses it to determine | |
| 32 // whether the user should be given a translation prompt or not. | |
| 33 template <typename T> | |
| 34 class ModelLoader { | |
| 35 public: | |
| 36 typedef typename T::ModelType ModelType; | |
| 
gab
2016/12/19 21:00:46
using T::ModelType;
should do the trick I think.
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Acknowledged.
 | |
| 37 | |
| 38 // A callback used by the model loader to determine whether a loaded model | |
| 39 // is suitable/valid by some measure. May be called multiple times. | |
| 40 typedef base::RepeatingCallback<bool(const ModelType&)> IsValidFunc; | |
| 41 | |
| 42 // Called with a non-null model unique_ptr when the loader has successfully | |
| 43 // loaded a compatible model, or with a null unique_ptr if the loader has | |
| 44 // failed to load a model after exhausting it alloted retry attempts. May be | |
| 45 // called multiple times if the cached model is compatible but out of date: | |
| 46 // once when the compatible cached model becomes available, and once again | |
| 47 // after downloading and validating an up-to-date mode. | |
| 48 typedef base::RepeatingCallback<void(std::unique_ptr<ModelType>)> | |
| 49 OnAvailableFunc; | |
| 
gab
2016/12/19 21:00:46
As of C++11,
using OnAvailableCallback =
    base
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Acknowledged.
 | |
| 50 | |
| 51 ModelLoader(); | |
| 52 ~ModelLoader() = default; | |
| 
gab
2016/12/19 21:00:46
Destructors should be defined out of line for clas
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 53 | |
| 54 // Sets the file path from which to load the cached model. This path will also | |
| 55 // be used to store a more up-to-date model if available. | |
| 56 ModelLoader& set_cache_file_path(const base::FilePath& cache_file_path) { | |
| 57 cache_file_path_ = cache_file_path; | |
| 58 return *this; | |
| 59 } | |
| 60 | |
| 61 // Sets the URL from which to download the model. This URL will be used if | |
| 62 // there is no cached model or if the cached model is not up-to-date. | |
| 63 ModelLoader& set_download_url(const GURL& download_url) { | |
| 64 download_url_ = download_url; | |
| 65 return *this; | |
| 66 } | |
| 67 | |
| 68 // Sets the callback that the model loader will use to validate that a given | |
| 69 // model is compatible with the callers needs. | |
| 70 ModelLoader& set_is_compatible_func(IsValidFunc f) { | |
| 
pasko
2016/12/19 14:26:49
why is it necessary to inject various functions li
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Moved this functionality to a ModelObserver interf
 | |
| 71 is_compatible_func_ = f; | |
| 72 return *this; | |
| 73 } | |
| 74 | |
| 75 // Sets the callback that the model loader will use to validate that a given | |
| 76 // model is sufficiently up-to-date per the callers expectations. For example, | |
| 77 // if the caller knows out-of-band the version of the most recent model. By | |
| 78 // implication, version information is expected to be embedded in the model. | |
| 79 ModelLoader& set_is_up_to_date_func(IsValidFunc f) { | |
| 80 is_up_to_date_func_ = f; | |
| 81 return *this; | |
| 82 } | |
| 83 | |
| 84 // Sets the callback that the model loader will use to notify the caller that | |
| 85 // a compatible model is available. | |
| 86 ModelLoader& set_on_model_available_func(OnAvailableFunc callback) { | |
| 87 on_model_available_func_ = callback; | |
| 88 return *this; | |
| 89 } | |
| 90 | |
| 91 // Asynchronously initiates loading the model from the cache file path and URL | |
| 92 // previously configured. | |
| 93 void Start(); | |
| 94 | |
| 95 // Call this method periodically to notify the downloader that translate is | |
| 96 // being used. This is used as a proxy notification for network activity. | |
| 97 // If a model download is pending, this will trigger (subject to retry and | |
| 98 // frequency limits) the download. | |
| 99 void NotifyOfTranslateEvent(); | |
| 100 | |
| 101 private: | |
| 102 // Enumeration denoting the outcome of an attempt to download the model. This | |
| 103 // must be kept in sync with the TranslateRankerModelStatus enum in | |
| 104 // histograms.xml | |
| 105 // TODO(rogerm): rename the enum in histograms.xml to be more generic | |
| 106 enum ModelStatus { | |
| 107 MODEL_STATUS_OK = 0, | |
| 108 MODEL_STATUS_DOWNLOAD_THROTTLED = 1, | |
| 109 MODEL_STATUS_DOWNLOAD_FAILED = 2, | |
| 110 MODEL_STATUS_PARSE_FAILED = 3, | |
| 111 MODEL_STATUS_VALIDATION_FAILED = 4, | |
| 112 // Insert new values above this line. | |
| 113 MODEL_STATUS_MAX | |
| 114 }; | |
| 115 | |
| 116 // The maximum number of model download attempts to make. Download may fail | |
| 117 // due to server error or network availability issues. | |
| 118 const int kMaxRetryOn5xx = 8; | |
| 119 | |
| 120 // The minimum duration, in minutes, between download attempts. | |
| 121 const int kDownloadRefractoryPeriodMin = 3; | |
| 122 | |
| 123 // Log the result of loading a model to UMA. | |
| 124 void ReportModelStatus(ModelStatus model_status); | |
| 125 | |
| 126 // Called to construct a model from the given |data|. | |
| 127 std::unique_ptr<ModelType> Parse(const std::string& data); | |
| 128 | |
| 129 // Task functor to read the model from cache and/or kick off a model download. | |
| 
gab
2016/12/19 21:00:46
s/Task functor to read .../Reads.../ ?
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 130 void LoadData(); | |
| 131 | |
| 132 // Called when the background task to download the model from |download_url_| | |
| 133 // has completed. | |
| 134 void OnDownloadComplete(int id, bool success, const std::string& data); | |
| 135 | |
| 136 // Task functor to write |data| to the model's cache file path. | |
| 137 void SaveData(const std::string& data); | |
| 138 | |
| 139 // The prefix to prepend to all UMA metrics generated by this loader. | |
| 140 const std::string uma_prefix_; | |
| 141 | |
| 142 // Used to protect the creation/destruction of the fetcher. | |
| 143 base::Lock lock_; | |
| 144 | |
| 145 // The task runner with which to perform background IO to read, download and | |
| 146 // cache the model. This must be a SingleThreadTaskRunner due to legacy | |
| 147 // requirements of the URLFetcher. | |
| 
fdoray
2016/12/19 15:18:29
// TODO(fdoray): Make this a SequencedTaskRunner o
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Acknowledged.
 | |
| 148 scoped_refptr<base::SingleThreadTaskRunner> task_runner_; | |
| 149 | |
| 150 // Used to download model data from |download_url_|. | |
| 151 // TODO(rogerm): Use net::URLFetcher directly? | |
| 152 std::unique_ptr<TranslateURLFetcher> url_fetcher_; | |
| 153 | |
| 154 // The next time before which no new attempts to download the model should be | |
| 155 // attempted. | |
| 156 base::Time next_earliest_download_time_; | |
| 157 | |
| 158 // Tracks the last time of the last attempt to download a model. Used for UMA | |
| 159 // reporting of download duration. | |
| 160 base::Time download_start_time_; | |
| 161 | |
| 162 // The path at which the model is (or should be) cached. | |
| 163 base::FilePath cache_file_path_; | |
| 164 | |
| 165 // The URL from which to download the model if the model is not in the cache | |
| 166 // or the cached model is invalid/expired. | |
| 167 GURL download_url_; | |
| 168 | |
| 169 // Functor used to check if a model is compatible with the caller. | |
| 
gab
2016/12/19 21:00:46
nit: s/Functor/Callback/ 
(and below)
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Acknowledged.
 | |
| 170 IsValidFunc is_compatible_func_; | |
| 171 | |
| 172 // Functor used to check if a model is up-to-date. | |
| 173 IsValidFunc is_up_to_date_func_; | |
| 174 | |
| 175 // Functor used to notify the caller on the availability of a compatible | |
| 176 // model. | |
| 177 OnAvailableFunc on_model_available_func_; | |
| 178 | |
| 179 DISALLOW_COPY_AND_ASSIGN(ModelLoader); | |
| 180 }; | |
| 181 | |
| 182 template <typename T> | |
| 183 ModelLoader<T>::ModelLoader() | |
| 184 : task_runner_(base::CreateSingleThreadTaskRunnerWithTraits( | |
| 185 base::TaskTraits() | |
| 186 .WithPriority(base::TaskPriority::BACKGROUND) | |
| 187 .WithShutdownBehavior( | |
| 188 base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN) | |
| 189 .WithWait() | |
| 190 .WithFileIO())) {} | |
| 191 | |
| 192 template <typename T> | |
| 193 void ModelLoader<T>::Start() { | |
| 194 task_runner_->PostTask( | |
| 195 FROM_HERE, base::Bind(&ModelLoader<T>::LoadData, base::Unretained(this))); | |
| 
pasko
2016/12/19 14:26:49
base::Unretained(this): what ensures that this mod
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
The traits of the task runner are to skip running
 | |
| 196 } | |
| 197 | |
| 198 template <typename T> | |
| 199 void ModelLoader<T>::NotifyOfTranslateEvent() { | |
| 
pasko
2016/12/19 14:26:49
please consider moving the implementation to the .
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Switched to a fixed ranker model proto instead of
 | |
| 200 // Immediate exit if no download is pending. | |
| 201 if (!url_fetcher_) | |
| 202 return; | |
| 203 | |
| 204 // Serialize calls to this rest of this method. | |
| 205 base::AutoLock auto_lock(lock_); | |
| 206 | |
| 207 // Validate that a download is still pending. | |
| 208 if (!url_fetcher_) | |
| 209 return; | |
| 210 | |
| 211 // If a request is already in flight, do not issue a new one. | |
| 212 if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) { | |
| 213 DVLOG(2) << "ModelLoader: Download is in progress."; | |
| 214 return; | |
| 215 } | |
| 216 // Do nothing if the download attempts should be throttled. | |
| 217 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) { | |
| 218 DVLOG(2) << "TranslateRanker: Last download attempt was too recent."; | |
| 219 return; | |
| 220 } | |
| 221 | |
| 222 DVLOG(2) << "Downloading model from: " << download_url_; | |
| 223 | |
| 224 // Reset the time of the next earliest allowable download attempt. | |
| 225 next_earliest_download_time_ = | |
| 226 base::Time::NowFromSystemTime() + | |
| 227 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); | |
| 
gab
2016/12/19 21:00:46
Use TimeTicks to compute any time difference, time
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 228 | |
| 229 // Kick off the next download attempt. | |
| 230 download_start_time_ = base::Time::Now(); | |
| 231 bool result = url_fetcher_->Request( | |
| 232 download_url_, | |
| 233 base::Bind(&ModelLoader<T>::OnDownloadComplete, base::Unretained(this))); | |
| 
pasko
2016/12/19 14:26:49
what guarantees that this callback does not get ru
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
It's on the current threads task runner. Which is
 | |
| 234 | |
| 235 // The maximum number of download attempts has been surpassed. Don't make | |
| 236 // any further attempts. | |
| 237 if (!result) { | |
| 238 DVLOG(2) << "Model download abandoned."; | |
| 239 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED); | |
| 240 url_fetcher_.reset(); | |
| 241 } | |
| 242 } | |
| 243 | |
| 244 template <typename T> | |
| 245 void ModelLoader<T>::ReportModelStatus( | |
| 246 typename ModelLoader<T>::ModelStatus model_status) { | |
| 247 UMA_HISTOGRAM_ENUMERATION(T::kModelStatusHistogram, model_status, | |
| 248 MODEL_STATUS_MAX); | |
| 249 } | |
| 250 | |
| 251 template <typename T> | |
| 252 std::unique_ptr<typename T::ModelType> ModelLoader<T>::Parse( | |
| 253 const std::string& data) { | |
| 254 SCOPED_UMA_HISTOGRAM_TIMER(T::kParsetimerHistogram); | |
| 255 | |
| 256 auto model = base::MakeUnique<chrome_intelligence::TranslateRankerModel>(); | |
| 257 | |
| 258 if (!model->ParseFromString(data)) { | |
| 259 ReportModelStatus(MODEL_STATUS_PARSE_FAILED); | |
| 260 return nullptr; | |
| 261 } | |
| 262 | |
| 263 if (!is_compatible_func_.Run(*model)) { | |
| 264 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED); | |
| 265 return nullptr; | |
| 266 } | |
| 267 | |
| 268 ReportModelStatus(MODEL_STATUS_OK); | |
| 269 return model; | |
| 270 } | |
| 271 | |
| 272 template <typename T> | |
| 273 void ModelLoader<T>::LoadData() { | |
| 274 // Attempt to read the model data from the cache file. | |
| 275 std::string data; | |
| 276 if (!cache_file_path_.empty()) { | |
| 277 DVLOG(2) << "Loading model from: " << cache_file_path_.value(); | |
| 278 SCOPED_UMA_HISTOGRAM_TIMER(T::kReadTimerHistogram); | |
| 279 if (!base::ReadFileToString(cache_file_path_, &data)) | |
| 280 data.clear(); | |
| 281 } | |
| 282 | |
| 283 // If the model was successfully was read and is compatible, then notify | |
| 284 // the "owner" of this model loader of the models availability (transferring | |
| 285 // ownership of the model). If the model is further, up to date, then there | |
| 286 // is no further work to be done. | |
| 287 if (!data.empty()) { | |
| 288 std::unique_ptr<ModelType> model = Parse(data); | |
| 289 if (model) { | |
| 290 bool is_up_to_date = is_up_to_date_func_.Run(*model); | |
| 291 on_model_available_func_.Run(std::move(model)); | |
| 
gab
2016/12/19 21:00:46
From the API it wasn't obvious to me that this cal
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
I've switched the API to an observer model, where
 | |
| 292 if (is_up_to_date) | |
| 293 return; | |
| 294 } | |
| 295 } | |
| 296 | |
| 297 // Reaching this point means that a model download is required. If there is | |
| 298 // no download URL configured, then there is nothing further to do. | |
| 299 if (!download_url_.is_valid()) | |
| 300 return; | |
| 301 | |
| 302 // Otherwise, initialize the model fetcher to be non-null and trigger an | |
| 303 // initial download itempt. | |
| 
gab
2016/12/19 21:00:46
nit: itempt?
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 304 url_fetcher_.reset(new TranslateURLFetcher(T::kFetcherId)); | |
| 305 url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); | |
| 306 NotifyOfTranslateEvent(); | |
| 307 } | |
| 308 | |
| 309 template <typename T> | |
| 310 void ModelLoader<T>::OnDownloadComplete(int /* id */, | |
| 311 bool success, | |
| 312 const std::string& data) { | |
| 313 UMA_HISTOGRAM_MEDIUM_TIMES(T::kDownloadTimerHistogram, | |
| 314 base::Time::Now() - download_start_time_); | |
| 
fdoray
2016/12/19 15:18:29
Use TimeTicks instead of Time to compute the the a
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 315 | |
| 316 // On failure, we just abort. The TranslateRanker will retry on a subsequent | |
| 317 // translation opportunity. The TranslateURLFetcher enforces a limit for | |
| 318 // retried requests. | |
| 319 if (!success) | |
| 320 return; | |
| 321 | |
| 322 auto model = Parse(data); | |
| 323 if (!model) | |
| 324 return; | |
| 325 | |
| 326 // Do we have the most recent model? Check now, before transferring ownership | |
| 327 // of the model away from this method. | |
| 328 bool is_up_to_date = is_up_to_date_func_.Run(*model); | |
| 329 | |
| 330 // Notify the owner that a compatible model is available. | |
| 331 on_model_available_func_.Run(std::move(model)); | |
| 332 | |
| 333 // It he model is the most recent, cache it and discontinue download attempts. | |
| 334 if (is_up_to_date) { | |
| 335 if (!cache_file_path_.empty()) { | |
| 336 task_runner_->PostTask( | |
| 337 FROM_HERE, | |
| 338 base::Bind(&ModelLoader<T>::SaveData, base::Unretained(this), data)); | |
| 339 } | |
| 340 base::AutoLock auto_lock(lock_); | |
| 341 url_fetcher_.reset(); | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 template <typename T> | |
| 346 void ModelLoader<T>::SaveData(const std::string& data) { | |
| 347 DCHECK(!cache_file_path_.empty()); | |
| 348 SCOPED_UMA_HISTOGRAM_TIMER(T::kWriteTimerHistogram); | |
| 349 base::ImportantFileWriter::WriteFileAtomically(cache_file_path_, data); | |
| 350 } | |
| 351 | |
| 352 class TranslateRankerModelTraits { | |
| 
pasko
2016/12/19 14:26:49
Do you have plans to add more traits? If not, plea
 
Roger McFarlane (Chromium)
2017/02/08 23:08:08
Done.
 | |
| 353 public: | |
| 354 typedef typename chrome_intelligence::TranslateRankerModel ModelType; | |
| 355 static const int kFetcherId; | |
| 356 static const char kWriteTimerHistogram[]; | |
| 357 static const char kReadTimerHistogram[]; | |
| 358 static const char kDownloadTimerHistogram[]; | |
| 359 static const char kParsetimerHistogram[]; | |
| 360 static const char kModelStatusHistogram[]; | |
| 361 }; | |
| 362 | |
| 363 typedef ModelLoader<TranslateRankerModelTraits> TranslateRankerModelLoader; | |
| 364 | |
| 365 } // namespace translate | |
| 366 | |
| 367 #endif // COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_ | |
| OLD | NEW |