Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1697)

Unified Diff: components/translate/core/browser/translate_ranker_model_loader.h

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: Initial CL Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/translate/core/browser/translate_ranker_model_loader.h
diff --git a/components/translate/core/browser/translate_ranker_model_loader.h b/components/translate/core/browser/translate_ranker_model_loader.h
new file mode 100644
index 0000000000000000000000000000000000000000..9d0d796ae2b95c33f8453c057809204d468cad5d
--- /dev/null
+++ b/components/translate/core/browser/translate_ranker_model_loader.h
@@ -0,0 +1,367 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_
+#define COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_
+
+#include <memory>
+#include <string>
+
+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/files/file_util.h"
+#include "base/files/important_file_writer.h"
+#include "base/gtest_prod_util.h"
+#include "base/memory/ptr_util.h"
+#include "base/memory/ref_counted.h"
+#include "base/metrics/histogram_macros.h"
+#include "base/profiler/scoped_tracker.h"
+#include "base/single_thread_task_runner.h"
+#include "base/strings/string_util.h"
+#include "base/synchronization/lock.h"
+#include "base/task_runner.h"
+#include "base/task_scheduler/post_task.h"
+#include "base/task_scheduler/task_traits.h"
+#include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
+#include "components/translate/core/browser/translate_url_fetcher.h"
+
+namespace translate {
+
+// If enabled, downloads a translate ranker model and uses it to determine
+// whether the user should be given a translation prompt or not.
+template <typename T>
+class ModelLoader {
+ public:
+ typedef typename T::ModelType ModelType;
gab 2016/12/19 21:00:46 using T::ModelType; should do the trick I think.
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
+
+ // A callback used by the model loader to determine whether a loaded model
+ // is suitable/valid by some measure. May be called multiple times.
+ typedef base::RepeatingCallback<bool(const ModelType&)> IsValidFunc;
+
+ // Called with a non-null model unique_ptr when the loader has successfully
+ // loaded a compatible model, or with a null unique_ptr if the loader has
+ // failed to load a model after exhausting it alloted retry attempts. May be
+ // called multiple times if the cached model is compatible but out of date:
+ // once when the compatible cached model becomes available, and once again
+ // after downloading and validating an up-to-date mode.
+ typedef base::RepeatingCallback<void(std::unique_ptr<ModelType>)>
+ OnAvailableFunc;
gab 2016/12/19 21:00:46 As of C++11, using OnAvailableCallback = base
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
+
+ ModelLoader();
+ ~ModelLoader() = default;
gab 2016/12/19 21:00:46 Destructors should be defined out of line for clas
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+
+ // Sets the file path from which to load the cached model. This path will also
+ // be used to store a more up-to-date model if available.
+ ModelLoader& set_cache_file_path(const base::FilePath& cache_file_path) {
+ cache_file_path_ = cache_file_path;
+ return *this;
+ }
+
+ // Sets the URL from which to download the model. This URL will be used if
+ // there is no cached model or if the cached model is not up-to-date.
+ ModelLoader& set_download_url(const GURL& download_url) {
+ download_url_ = download_url;
+ return *this;
+ }
+
+ // Sets the callback that the model loader will use to validate that a given
+ // model is compatible with the callers needs.
+ ModelLoader& set_is_compatible_func(IsValidFunc f) {
pasko 2016/12/19 14:26:49 why is it necessary to inject various functions li
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Moved this functionality to a ModelObserver interf
+ is_compatible_func_ = f;
+ return *this;
+ }
+
+ // Sets the callback that the model loader will use to validate that a given
+ // model is sufficiently up-to-date per the callers expectations. For example,
+ // if the caller knows out-of-band the version of the most recent model. By
+ // implication, version information is expected to be embedded in the model.
+ ModelLoader& set_is_up_to_date_func(IsValidFunc f) {
+ is_up_to_date_func_ = f;
+ return *this;
+ }
+
+ // Sets the callback that the model loader will use to notify the caller that
+ // a compatible model is available.
+ ModelLoader& set_on_model_available_func(OnAvailableFunc callback) {
+ on_model_available_func_ = callback;
+ return *this;
+ }
+
+ // Asynchronously initiates loading the model from the cache file path and URL
+ // previously configured.
+ void Start();
+
+ // Call this method periodically to notify the downloader that translate is
+ // being used. This is used as a proxy notification for network activity.
+ // If a model download is pending, this will trigger (subject to retry and
+ // frequency limits) the download.
+ void NotifyOfTranslateEvent();
+
+ private:
+ // Enumeration denoting the outcome of an attempt to download the model. This
+ // must be kept in sync with the TranslateRankerModelStatus enum in
+ // histograms.xml
+ // TODO(rogerm): rename the enum in histograms.xml to be more generic
+ enum ModelStatus {
+ MODEL_STATUS_OK = 0,
+ MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
+ MODEL_STATUS_DOWNLOAD_FAILED = 2,
+ MODEL_STATUS_PARSE_FAILED = 3,
+ MODEL_STATUS_VALIDATION_FAILED = 4,
+ // Insert new values above this line.
+ MODEL_STATUS_MAX
+ };
+
+ // The maximum number of model download attempts to make. Download may fail
+ // due to server error or network availability issues.
+ const int kMaxRetryOn5xx = 8;
+
+ // The minimum duration, in minutes, between download attempts.
+ const int kDownloadRefractoryPeriodMin = 3;
+
+ // Log the result of loading a model to UMA.
+ void ReportModelStatus(ModelStatus model_status);
+
+ // Called to construct a model from the given |data|.
+ std::unique_ptr<ModelType> Parse(const std::string& data);
+
+ // Task functor to read the model from cache and/or kick off a model download.
gab 2016/12/19 21:00:46 s/Task functor to read .../Reads.../ ?
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+ void LoadData();
+
+ // Called when the background task to download the model from |download_url_|
+ // has completed.
+ void OnDownloadComplete(int id, bool success, const std::string& data);
+
+ // Task functor to write |data| to the model's cache file path.
+ void SaveData(const std::string& data);
+
+ // The prefix to prepend to all UMA metrics generated by this loader.
+ const std::string uma_prefix_;
+
+ // Used to protect the creation/destruction of the fetcher.
+ base::Lock lock_;
+
+ // The task runner with which to perform background IO to read, download and
+ // cache the model. This must be a SingleThreadTaskRunner due to legacy
+ // requirements of the URLFetcher.
fdoray 2016/12/19 15:18:29 // TODO(fdoray): Make this a SequencedTaskRunner o
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
+ scoped_refptr<base::SingleThreadTaskRunner> task_runner_;
+
+ // Used to download model data from |download_url_|.
+ // TODO(rogerm): Use net::URLFetcher directly?
+ std::unique_ptr<TranslateURLFetcher> url_fetcher_;
+
+ // The next time before which no new attempts to download the model should be
+ // attempted.
+ base::Time next_earliest_download_time_;
+
+ // Tracks the last time of the last attempt to download a model. Used for UMA
+ // reporting of download duration.
+ base::Time download_start_time_;
+
+ // The path at which the model is (or should be) cached.
+ base::FilePath cache_file_path_;
+
+ // The URL from which to download the model if the model is not in the cache
+ // or the cached model is invalid/expired.
+ GURL download_url_;
+
+ // Functor used to check if a model is compatible with the caller.
gab 2016/12/19 21:00:46 nit: s/Functor/Callback/ (and below)
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Acknowledged.
+ IsValidFunc is_compatible_func_;
+
+ // Functor used to check if a model is up-to-date.
+ IsValidFunc is_up_to_date_func_;
+
+ // Functor used to notify the caller on the availability of a compatible
+ // model.
+ OnAvailableFunc on_model_available_func_;
+
+ DISALLOW_COPY_AND_ASSIGN(ModelLoader);
+};
+
+template <typename T>
+ModelLoader<T>::ModelLoader()
+ : task_runner_(base::CreateSingleThreadTaskRunnerWithTraits(
+ base::TaskTraits()
+ .WithPriority(base::TaskPriority::BACKGROUND)
+ .WithShutdownBehavior(
+ base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN)
+ .WithWait()
+ .WithFileIO())) {}
+
+template <typename T>
+void ModelLoader<T>::Start() {
+ task_runner_->PostTask(
+ FROM_HERE, base::Bind(&ModelLoader<T>::LoadData, base::Unretained(this)));
pasko 2016/12/19 14:26:49 base::Unretained(this): what ensures that this mod
Roger McFarlane (Chromium) 2017/02/08 23:08:08 The traits of the task runner are to skip running
+}
+
+template <typename T>
+void ModelLoader<T>::NotifyOfTranslateEvent() {
pasko 2016/12/19 14:26:49 please consider moving the implementation to the .
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Switched to a fixed ranker model proto instead of
+ // Immediate exit if no download is pending.
+ if (!url_fetcher_)
+ return;
+
+ // Serialize calls to this rest of this method.
+ base::AutoLock auto_lock(lock_);
+
+ // Validate that a download is still pending.
+ if (!url_fetcher_)
+ return;
+
+ // If a request is already in flight, do not issue a new one.
+ if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
+ DVLOG(2) << "ModelLoader: Download is in progress.";
+ return;
+ }
+ // Do nothing if the download attempts should be throttled.
+ if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
+ DVLOG(2) << "TranslateRanker: Last download attempt was too recent.";
+ return;
+ }
+
+ DVLOG(2) << "Downloading model from: " << download_url_;
+
+ // Reset the time of the next earliest allowable download attempt.
+ next_earliest_download_time_ =
+ base::Time::NowFromSystemTime() +
+ base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
gab 2016/12/19 21:00:46 Use TimeTicks to compute any time difference, time
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+
+ // Kick off the next download attempt.
+ download_start_time_ = base::Time::Now();
+ bool result = url_fetcher_->Request(
+ download_url_,
+ base::Bind(&ModelLoader<T>::OnDownloadComplete, base::Unretained(this)));
pasko 2016/12/19 14:26:49 what guarantees that this callback does not get ru
Roger McFarlane (Chromium) 2017/02/08 23:08:08 It's on the current threads task runner. Which is
+
+ // The maximum number of download attempts has been surpassed. Don't make
+ // any further attempts.
+ if (!result) {
+ DVLOG(2) << "Model download abandoned.";
+ ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
+ url_fetcher_.reset();
+ }
+}
+
+template <typename T>
+void ModelLoader<T>::ReportModelStatus(
+ typename ModelLoader<T>::ModelStatus model_status) {
+ UMA_HISTOGRAM_ENUMERATION(T::kModelStatusHistogram, model_status,
+ MODEL_STATUS_MAX);
+}
+
+template <typename T>
+std::unique_ptr<typename T::ModelType> ModelLoader<T>::Parse(
+ const std::string& data) {
+ SCOPED_UMA_HISTOGRAM_TIMER(T::kParsetimerHistogram);
+
+ auto model = base::MakeUnique<chrome_intelligence::TranslateRankerModel>();
+
+ if (!model->ParseFromString(data)) {
+ ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
+ return nullptr;
+ }
+
+ if (!is_compatible_func_.Run(*model)) {
+ ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
+ return nullptr;
+ }
+
+ ReportModelStatus(MODEL_STATUS_OK);
+ return model;
+}
+
+template <typename T>
+void ModelLoader<T>::LoadData() {
+ // Attempt to read the model data from the cache file.
+ std::string data;
+ if (!cache_file_path_.empty()) {
+ DVLOG(2) << "Loading model from: " << cache_file_path_.value();
+ SCOPED_UMA_HISTOGRAM_TIMER(T::kReadTimerHistogram);
+ if (!base::ReadFileToString(cache_file_path_, &data))
+ data.clear();
+ }
+
+ // If the model was successfully was read and is compatible, then notify
+ // the "owner" of this model loader of the models availability (transferring
+ // ownership of the model). If the model is further, up to date, then there
+ // is no further work to be done.
+ if (!data.empty()) {
+ std::unique_ptr<ModelType> model = Parse(data);
+ if (model) {
+ bool is_up_to_date = is_up_to_date_func_.Run(*model);
+ on_model_available_func_.Run(std::move(model));
gab 2016/12/19 21:00:46 From the API it wasn't obvious to me that this cal
Roger McFarlane (Chromium) 2017/02/08 23:08:08 I've switched the API to an observer model, where
+ if (is_up_to_date)
+ return;
+ }
+ }
+
+ // Reaching this point means that a model download is required. If there is
+ // no download URL configured, then there is nothing further to do.
+ if (!download_url_.is_valid())
+ return;
+
+ // Otherwise, initialize the model fetcher to be non-null and trigger an
+ // initial download itempt.
gab 2016/12/19 21:00:46 nit: itempt?
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+ url_fetcher_.reset(new TranslateURLFetcher(T::kFetcherId));
+ url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
+ NotifyOfTranslateEvent();
+}
+
+template <typename T>
+void ModelLoader<T>::OnDownloadComplete(int /* id */,
+ bool success,
+ const std::string& data) {
+ UMA_HISTOGRAM_MEDIUM_TIMES(T::kDownloadTimerHistogram,
+ base::Time::Now() - download_start_time_);
fdoray 2016/12/19 15:18:29 Use TimeTicks instead of Time to compute the the a
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+
+ // On failure, we just abort. The TranslateRanker will retry on a subsequent
+ // translation opportunity. The TranslateURLFetcher enforces a limit for
+ // retried requests.
+ if (!success)
+ return;
+
+ auto model = Parse(data);
+ if (!model)
+ return;
+
+ // Do we have the most recent model? Check now, before transferring ownership
+ // of the model away from this method.
+ bool is_up_to_date = is_up_to_date_func_.Run(*model);
+
+ // Notify the owner that a compatible model is available.
+ on_model_available_func_.Run(std::move(model));
+
+ // It he model is the most recent, cache it and discontinue download attempts.
+ if (is_up_to_date) {
+ if (!cache_file_path_.empty()) {
+ task_runner_->PostTask(
+ FROM_HERE,
+ base::Bind(&ModelLoader<T>::SaveData, base::Unretained(this), data));
+ }
+ base::AutoLock auto_lock(lock_);
+ url_fetcher_.reset();
+ }
+}
+
+template <typename T>
+void ModelLoader<T>::SaveData(const std::string& data) {
+ DCHECK(!cache_file_path_.empty());
+ SCOPED_UMA_HISTOGRAM_TIMER(T::kWriteTimerHistogram);
+ base::ImportantFileWriter::WriteFileAtomically(cache_file_path_, data);
+}
+
+class TranslateRankerModelTraits {
pasko 2016/12/19 14:26:49 Do you have plans to add more traits? If not, plea
Roger McFarlane (Chromium) 2017/02/08 23:08:08 Done.
+ public:
+ typedef typename chrome_intelligence::TranslateRankerModel ModelType;
+ static const int kFetcherId;
+ static const char kWriteTimerHistogram[];
+ static const char kReadTimerHistogram[];
+ static const char kDownloadTimerHistogram[];
+ static const char kParsetimerHistogram[];
+ static const char kModelStatusHistogram[];
+};
+
+typedef ModelLoader<TranslateRankerModelTraits> TranslateRankerModelLoader;
+
+} // namespace translate
+
+#endif // COMPONENTS_TRANSLATE_CORE_BROWSER_TRANSLATE_RANKER_MODEL_LOADER_H_

Powered by Google App Engine
This is Rietveld 408576698