Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Unified Diff: components/translate/core/browser/ranker_model_loader.cc

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: for asan testing only - DO NOT COMMIT Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/translate/core/browser/ranker_model_loader.cc
diff --git a/components/translate/core/browser/ranker_model_loader.cc b/components/translate/core/browser/ranker_model_loader.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f969fea762e29301d928fe281e1b9daed6ba6f74
--- /dev/null
+++ b/components/translate/core/browser/ranker_model_loader.cc
@@ -0,0 +1,381 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/translate/core/browser/ranker_model_loader.h"
+
+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/command_line.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/important_file_writer.h"
+#include "base/macros.h"
+#include "base/memory/ptr_util.h"
+#include "base/memory/ref_counted.h"
+#include "base/metrics/histogram_macros.h"
+#include "base/profiler/scoped_tracker.h"
+#include "base/strings/string_util.h"
+#include "base/task_scheduler/post_task.h"
+#include "base/threading/sequenced_task_runner_handle.h"
+#include "components/translate/core/browser/proto/ranker_model.pb.h"
+#include "components/translate/core/browser/translate_url_fetcher.h"
+#include "components/translate/core/common/translate_switches.h"
+#include "components/variations/variations_associated_data.h"
+
+namespace translate {
+
+namespace {
+
+const int kUrlFetcherId = 2;
groby-ooo-7-16 2017/02/14 20:56:47 Why do we hardcode a fetcher ID? I've only seen th
fdoray 2017/02/15 20:07:40 constexpr here and below
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I followed the other examples of the TranslateURLF
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+
+// The maximum number of model download attempts to make. Download may fail
+// due to server error or network availability issues.
+const int kMaxRetryOn5xx = 8;
+
+// The minimum duration, in minutes, between download attempts.
+const int kDownloadRefractoryPeriodMin = 3;
groby-ooo-7-16 2017/02/14 20:56:47 1) since this is a duration, I'd love for this to
Roger McFarlane (Chromium) 2017/02/21 22:16:48 I'll make this an input param (via an options clas
+
+// Suffixes for the various histograms produced by the backend.
+const char kWriteTimerHistogram[] = ".Timer.WriteModel";
+const char kReadTimerHistogram[] = ".Timer.ReadModel";
+const char kDownloadTimerHistogram[] = ".Timer.DownloadModel";
+const char kParsetimerHistogram[] = ".Timer.ParseModel";
+const char kModelStatusHistogram[] = ".Model.Status";
+
+// A helper class to produce a scoped timer histogram that supports using a
+// non-static-const name.
+class MyScopedHistogramTimer {
+ public:
+ MyScopedHistogramTimer(const base::StringPiece& name)
groby-ooo-7-16 2017/02/14 20:56:48 FWIW: I take it the UMA peeps are OK with dynamica
Roger McFarlane (Chromium) 2017/02/21 22:16:50 I got this recipe from the UMA folks, yes. I'll a
+ : name_(name.begin(), name.end()), start_(base::TimeTicks::Now()) {}
+
+ ~MyScopedHistogramTimer() {
+ base::TimeDelta duration = base::TimeTicks::Now() - start_;
+ base::HistogramBase* counter = base::Histogram::FactoryTimeGet(
+ name_, base::TimeDelta::FromMilliseconds(10),
+ base::TimeDelta::FromMilliseconds(200000), 100,
+ base::HistogramBase::kUmaTargetedHistogramFlag);
+ if (counter)
+ counter->AddTime(duration);
+ }
+
+ private:
+ const std::string name_;
+ const base::TimeTicks start_;
+
+ DISALLOW_COPY_AND_ASSIGN(MyScopedHistogramTimer);
+};
+
+bool IsExpired(const chrome_intelligence::RankerModel& model) {
groby-ooo-7-16 2017/02/14 20:56:47 Should this be a member on the model?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+ DCHECK(model.has_metadata());
+ const auto& metadata = model.metadata();
+
+ // If the age of the model cannot be determined, presume it to be expired.
+ if (!metadata.has_last_modified_sec())
+ return true;
+
+ // If the model has no set cache duration, then it never expires.
+ if (!metadata.has_cache_duration_sec() || metadata.cache_duration_sec() == 0)
+ return false;
+
+ // Otherwise, a model is expired if its age exceeds the cache duration.
+ base::Time last_modified =
+ base::Time() + base::TimeDelta::FromSeconds(metadata.last_modified_sec());
+ base::TimeDelta age = base::Time::Now() - last_modified;
+ return age > base::TimeDelta::FromSeconds(metadata.cache_duration_sec());
+}
+
+} // namespace
+
+class RankerModelLoader::Backend {
+ public:
+ Backend(const ValidateCallback& validate_callback,
+ const OnModelAvailableCallback& on_model_available_callback,
+ const base::FilePath& model_path,
+ const GURL& model_url,
+ const std::string& uma_prefix);
+ ~Backend();
+
+ // Reads the model from |model_path_|.
+ void LoadFromFile();
+
+ // Reads the model |model_url_|.
+ void LoadFromURL();
+
+ private:
+ // Log the result of loading a model to UMA.
+ void ReportModelStatus(RankerModelStatus model_status);
+
+ // Called to construct a model from the given |data|.
groby-ooo-7-16 2017/02/14 20:56:48 nit: Please don't use passive voice: "Constructs a
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+ std::unique_ptr<chrome_intelligence::RankerModel> Parse(
+ const std::string& data);
+
+ // Called the download initiated by LoadFromURL() completes.
groby-ooo-7-16 2017/02/14 20:56:47 Would you mind documenting |id|?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+ void OnDownloadComplete(int id, bool success, const std::string& data);
+
+ // Transfer ownership of |model| to the |observer_|.
hamelphi 2017/02/13 19:18:58 |observer_| is not defined in this class. Do you m
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I updated the comment and method names to refer to
+ void TransferModelToObserver(
+ std::unique_ptr<chrome_intelligence::RankerModel> model);
+
+ // Validates that ranker model loader tasks are all performed on the correct
+ // sequence.
+ base::SequenceChecker sequence_checker_;
+
+ // The TaskRunner on which this was constructed.
groby-ooo-7-16 2017/02/14 20:56:48 nit: "On which |this| was constructed"
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+ const scoped_refptr<base::SequencedTaskRunner> origin_task_runner_;
+
+ // Validates a ranker model on behalf of the model loader client. This may
+ // be called on any sequence and must, therefore, be thread-safe.
+ const ValidateCallback validate_callback_;
+
+ // Transfers ownership of a loaded model back to the model loader client.
+ // This will be called on the sequence on which the model loader was
+ // constructed.
+ const OnModelAvailableCallback on_model_available_callback_;
+
+ // The path at which the model is (or should be) cached.
+ const base::FilePath model_path_;
+
+ // The URL from which to download the model if the model is not in the cache
+ // or the cached model is invalid/expired.
+ const GURL model_url_;
+
+ // This will prefix all UMA metrics generated by the model loader.
+ const std::string uma_prefix_;
+
+ // Used to download model data from |model_url_|.
+ // TODO(rogerm): Use net::URLFetcher directly?
+ std::unique_ptr<TranslateURLFetcher> url_fetcher_;
+
+ // The next time before which no new attempts to download the model should be
+ // attempted.
+ base::TimeTicks next_earliest_download_time_;
groby-ooo-7-16 2017/02/14 20:56:47 Why not just use a timer to schedule the next atte
Roger McFarlane (Chromium) 2017/02/21 22:16:49 I'm actually using this a throttle, not a deadline
+
+ // Tracks the last time of the last attempt to download a model. Used for UMA
+ // reporting of download duration.
+ base::TimeTicks download_start_time_;
+
+ DISALLOW_COPY_AND_ASSIGN(Backend);
+};
+
+RankerModelLoader::RankerModelLoader(
+ const ValidateCallback& validate_callback,
+ const OnModelAvailableCallback& on_model_available_callback,
+ const base::FilePath& model_path,
+ const GURL& model_url,
+ const std::string& uma_prefix)
+ : backend_task_runner_(base::CreateSequencedTaskRunnerWithTraits(
+ base::TaskTraits().MayBlock())),
fdoray 2017/02/15 20:07:40 Backend doesn't access memory that could be freed
Roger McFarlane (Chromium) 2017/02/21 22:16:50 Done. The backend forwards downloaded models to t
+ backend_(base::MakeUnique<Backend>(validate_callback,
+ on_model_available_callback,
+ model_path,
+ model_url,
+ uma_prefix)) {}
+
+RankerModelLoader::~RankerModelLoader() {
+ // This is guaranteed to be sequenced after any pending backend operation.
+ backend_task_runner_->DeleteSoon(FROM_HERE, backend_.release());
+}
+
+void RankerModelLoader::Start() {
+ backend_task_runner_->PostTask(
+ FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromFile,
+ base::Unretained(backend_.get())));
+}
+
+void RankerModelLoader::NotifyOfNetworkAvailability() {
+ backend_task_runner_->PostTask(
+ FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromURL,
+ base::Unretained(backend_.get())));
+}
+
groby-ooo-7-16 2017/02/14 20:56:48 Maybe at least a double line separator to make it
Roger McFarlane (Chromium) 2017/02/21 22:16:48 Done.
+RankerModelLoader::Backend::Backend(
+ const ValidateCallback& validate_callback,
+ const OnModelAvailableCallback& on_model_available_callback,
+ const base::FilePath& model_path,
+ const GURL& model_url,
+ const std::string& uma_prefix)
+ : origin_task_runner_(base::SequencedTaskRunnerHandle::Get()),
+ validate_callback_(validate_callback),
+ on_model_available_callback_(on_model_available_callback),
+ model_path_(model_path),
+ model_url_(model_url),
+ uma_prefix_(uma_prefix) {
+ sequence_checker_.DetachFromSequence();
+}
+
+RankerModelLoader::Backend::~Backend() = default;
groby-ooo-7-16 2017/02/14 20:56:47 curious - why RankerModelLoader::Backend::~B
Roger McFarlane (Chromium) 2017/02/21 22:16:48 they're equivalent, so no particular reason.
+
+void RankerModelLoader::Backend::ReportModelStatus(
+ RankerModelStatus model_status) {
+ base::HistogramBase* histogram = base::LinearHistogram::FactoryGet(
+ uma_prefix_ + kModelStatusHistogram, 1,
+ static_cast<int>(RankerModelStatus::MAX),
+ static_cast<int>(RankerModelStatus::MAX) + 1,
+ base::HistogramBase::kUmaTargetedHistogramFlag);
+ if (histogram)
+ histogram->Add(static_cast<int>(model_status));
+}
+
+std::unique_ptr<chrome_intelligence::RankerModel>
+RankerModelLoader::Backend::Parse(const std::string& data) {
+ DCHECK(sequence_checker_.CalledOnValidSequence());
+ MyScopedHistogramTimer timer(uma_prefix_ + kParsetimerHistogram);
+
+ auto model = base::MakeUnique<chrome_intelligence::RankerModel>();
+
+ if (!model->ParseFromString(data)) {
+ ReportModelStatus(RankerModelStatus::PARSE_FAILED);
+ return nullptr;
+ }
+
+ RankerModelStatus model_status = validate_callback_.Run(*model);
groby-ooo-7-16 2017/02/14 20:56:48 Thought: If the callback took a ptr, and you had a
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Not quite. I might have a non-null, but invalid mo
+ ReportModelStatus(model_status);
+ if (model_status != RankerModelStatus::OK)
+ return nullptr;
+
+ return model;
+}
+
+void RankerModelLoader::Backend::LoadFromFile() {
+ DCHECK(sequence_checker_.CalledOnValidSequence());
+ // Attempt to read the model data from the cache file.
+ std::string data;
+ if (!model_path_.empty()) {
+ DVLOG(2) << "Attempting to load model from: " << model_path_.value();
+ MyScopedHistogramTimer timer(uma_prefix_ + kReadTimerHistogram);
+ if (!base::ReadFileToString(model_path_, &data)) {
+ DVLOG(2) << "Failed to read model from: " << model_path_.value();
+ data.clear();
+ }
+ }
+
+ // If the model was successfully was read and is compatible, then notify
+ // the "owner" of this model loader of the models availability (transferring
+ // ownership of the model). If the model originated at the model_url_ and has
+ // not expired then there is no need to download a new one.
+ if (!data.empty()) {
+ std::unique_ptr<chrome_intelligence::RankerModel> model = Parse(data);
+ if (model) {
+ std::string url_spec = model->metadata().source();
+ bool is_expired = IsExpired(*model);
+ DVLOG(2) << (is_expired ? "Expired m" : "M") << "odel in '"
+ << model_path_.value() << "'' was originally downloaded from '"
+ << url_spec << "'.";
+
+ TransferModelToObserver(std::move(model));
+
+ if (url_spec == model_url_.spec() && !is_expired)
hamelphi 2017/02/13 19:18:58 Maybe comment on what it means if either condition
Roger McFarlane (Chromium) 2017/02/21 22:16:49 Done.
+ return;
+ }
+ }
+
+ // Reaching this point means that a model download is required. If there is
+ // no download URL configured, then there is nothing further to do.
+ if (!model_url_.is_valid())
+ return;
+
+ // Otherwise, initialize the model fetcher to be non-null and trigger an
+ // initial download attempt.
+ url_fetcher_ = base::MakeUnique<TranslateURLFetcher>(kUrlFetcherId);
+ url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
+ LoadFromURL();
groby-ooo-7-16 2017/02/14 20:56:48 FWIW, LoadFromURL and LoadFromFile have slightly a
Roger McFarlane (Chromium) 2017/02/21 22:16:50 I made them a bit more symmetric. url_fetcher_ ==
+}
+
+void RankerModelLoader::Backend::LoadFromURL() {
+ DCHECK(sequence_checker_.CalledOnValidSequence());
+ // Immediately exit if there is no download pending.
+ if (!url_fetcher_)
+ return;
+
+ // If a request is already in flight, do not issue a new one.
groby-ooo-7-16 2017/02/14 20:56:47 IIUC, we only need this because we're currently us
Roger McFarlane (Chromium) 2017/02/21 22:16:49 By transition-based approach, you mean watching th
+ if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
+ DVLOG(2) << "RankerModelLoader: Download is in progress.";
+ return;
+ }
+ // Do nothing if the download attempts should be throttled.
+ if (base::TimeTicks::Now() < next_earliest_download_time_) {
+ DVLOG(2) << "ModelLoadser: Last download attempt was too recent.";
hamelphi 2017/02/13 19:18:58 s\ModelLoadser\ModelLoader
Roger McFarlane (Chromium) 2017/02/21 22:16:50 Done.
+ return;
+ }
+
+ DVLOG(2) << "Downloading model from: " << model_url_;
+
+ // Reset the time of the next earliest allowable download attempt.
+ next_earliest_download_time_ =
+ base::TimeTicks::Now() +
+ base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
+
+ // Kick off the next download attempt.
+ download_start_time_ = base::TimeTicks::Now();
+ bool result = url_fetcher_->Request(
+ model_url_, base::Bind(&RankerModelLoader::Backend::OnDownloadComplete,
+ base::Unretained(this)));
+
+ // The maximum number of download attempts has been surpassed. Don't make
+ // any further attempts.
+ if (!result) {
+ DVLOG(2) << "Model download abandoned.";
+ ReportModelStatus(RankerModelStatus::DOWNLOAD_FAILED);
+ url_fetcher_.reset();
+ }
+}
+
+void RankerModelLoader::Backend::OnDownloadComplete(int /* id */,
+ bool success,
+ const std::string& data) {
+ DCHECK(sequence_checker_.CalledOnValidSequence());
+
+ // Record the duration of the download.
+ base::TimeDelta duration = base::TimeTicks::Now() - download_start_time_;
+ base::HistogramBase* counter = base::Histogram::FactoryTimeGet(
+ uma_prefix_ + kDownloadTimerHistogram,
+ base::TimeDelta::FromMilliseconds(10),
+ base::TimeDelta::FromMilliseconds(200000), 100,
+ base::HistogramBase::kUmaTargetedHistogramFlag);
+ if (counter)
+ counter->AddTime(duration);
+
+ // On failure, we just abort. The TranslateRanker will retry on a subsequent
+ // translation opportunity. The TranslateURLFetcher enforces a limit for
groby-ooo-7-16 2017/02/14 20:56:48 Should we instead retry once network is available?
Roger McFarlane (Chromium) 2017/02/21 22:16:49 See my previous comments/thoughts/questions about
+ // retried requests.
+ if (!success || data.empty()) {
+ DVLOG(2) << "Download from '" << model_url_ << "'' failed.";
+ return;
+ }
+
+ auto model = Parse(data);
+ if (!model) {
+ DVLOG(2) << "Model from '" << model_url_ << "'' not valid.";
+ return;
+ }
+
+ url_fetcher_.reset();
+
+ auto* metadata = model->mutable_metadata();
+ metadata->set_source(model_url_.spec());
+ metadata->set_last_modified_sec(
+ (base::Time::Now() - base::Time()).InSeconds());
+
+ if (!model_path_.empty()) {
+ DVLOG(2) << "Saving model from '" << model_url_ << "'' to '"
+ << model_path_.value() << "'.";
+ MyScopedHistogramTimer timer(uma_prefix_ + kWriteTimerHistogram);
+ base::ImportantFileWriter::WriteFileAtomically(model_path_,
+ model->SerializeAsString());
+ }
+
+ // Notify the owner that a compatible model is available.
+ TransferModelToObserver(std::move(model));
+}
+
+void RankerModelLoader::Backend::TransferModelToObserver(
+ std::unique_ptr<chrome_intelligence::RankerModel> model) {
+ DCHECK(sequence_checker_.CalledOnValidSequence());
+ origin_task_runner_->PostTask(
+ FROM_HERE,
+ base::Bind(on_model_available_callback_, base::Passed(std::move(model))));
+}
+
+} // namespace translate

Powered by Google App Engine
This is Rietveld 408576698