Index: components/translate/core/browser/ranker_model_loader.cc |
diff --git a/components/translate/core/browser/ranker_model_loader.cc b/components/translate/core/browser/ranker_model_loader.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f969fea762e29301d928fe281e1b9daed6ba6f74 |
--- /dev/null |
+++ b/components/translate/core/browser/ranker_model_loader.cc |
@@ -0,0 +1,381 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "components/translate/core/browser/ranker_model_loader.h" |
+ |
+#include "base/bind.h" |
+#include "base/bind_helpers.h" |
+#include "base/command_line.h" |
+#include "base/files/file_path.h" |
+#include "base/files/file_util.h" |
+#include "base/files/important_file_writer.h" |
+#include "base/macros.h" |
+#include "base/memory/ptr_util.h" |
+#include "base/memory/ref_counted.h" |
+#include "base/metrics/histogram_macros.h" |
+#include "base/profiler/scoped_tracker.h" |
+#include "base/strings/string_util.h" |
+#include "base/task_scheduler/post_task.h" |
+#include "base/threading/sequenced_task_runner_handle.h" |
+#include "components/translate/core/browser/proto/ranker_model.pb.h" |
+#include "components/translate/core/browser/translate_url_fetcher.h" |
+#include "components/translate/core/common/translate_switches.h" |
+#include "components/variations/variations_associated_data.h" |
+ |
+namespace translate { |
+ |
+namespace { |
+ |
+const int kUrlFetcherId = 2; |
groby-ooo-7-16
2017/02/14 20:56:47
Why do we hardcode a fetcher ID? I've only seen th
fdoray
2017/02/15 20:07:40
constexpr here and below
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I followed the other examples of the TranslateURLF
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ |
+// The maximum number of model download attempts to make. Download may fail |
+// due to server error or network availability issues. |
+const int kMaxRetryOn5xx = 8; |
+ |
+// The minimum duration, in minutes, between download attempts. |
+const int kDownloadRefractoryPeriodMin = 3; |
groby-ooo-7-16
2017/02/14 20:56:47
1) since this is a duration, I'd love for this to
Roger McFarlane (Chromium)
2017/02/21 22:16:48
I'll make this an input param (via an options clas
|
+ |
+// Suffixes for the various histograms produced by the backend. |
+const char kWriteTimerHistogram[] = ".Timer.WriteModel"; |
+const char kReadTimerHistogram[] = ".Timer.ReadModel"; |
+const char kDownloadTimerHistogram[] = ".Timer.DownloadModel"; |
+const char kParsetimerHistogram[] = ".Timer.ParseModel"; |
+const char kModelStatusHistogram[] = ".Model.Status"; |
+ |
+// A helper class to produce a scoped timer histogram that supports using a |
+// non-static-const name. |
+class MyScopedHistogramTimer { |
+ public: |
+ MyScopedHistogramTimer(const base::StringPiece& name) |
groby-ooo-7-16
2017/02/14 20:56:48
FWIW: I take it the UMA peeps are OK with dynamica
Roger McFarlane (Chromium)
2017/02/21 22:16:50
I got this recipe from the UMA folks, yes.
I'll a
|
+ : name_(name.begin(), name.end()), start_(base::TimeTicks::Now()) {} |
+ |
+ ~MyScopedHistogramTimer() { |
+ base::TimeDelta duration = base::TimeTicks::Now() - start_; |
+ base::HistogramBase* counter = base::Histogram::FactoryTimeGet( |
+ name_, base::TimeDelta::FromMilliseconds(10), |
+ base::TimeDelta::FromMilliseconds(200000), 100, |
+ base::HistogramBase::kUmaTargetedHistogramFlag); |
+ if (counter) |
+ counter->AddTime(duration); |
+ } |
+ |
+ private: |
+ const std::string name_; |
+ const base::TimeTicks start_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(MyScopedHistogramTimer); |
+}; |
+ |
+bool IsExpired(const chrome_intelligence::RankerModel& model) { |
groby-ooo-7-16
2017/02/14 20:56:47
Should this be a member on the model?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ DCHECK(model.has_metadata()); |
+ const auto& metadata = model.metadata(); |
+ |
+ // If the age of the model cannot be determined, presume it to be expired. |
+ if (!metadata.has_last_modified_sec()) |
+ return true; |
+ |
+ // If the model has no set cache duration, then it never expires. |
+ if (!metadata.has_cache_duration_sec() || metadata.cache_duration_sec() == 0) |
+ return false; |
+ |
+ // Otherwise, a model is expired if its age exceeds the cache duration. |
+ base::Time last_modified = |
+ base::Time() + base::TimeDelta::FromSeconds(metadata.last_modified_sec()); |
+ base::TimeDelta age = base::Time::Now() - last_modified; |
+ return age > base::TimeDelta::FromSeconds(metadata.cache_duration_sec()); |
+} |
+ |
+} // namespace |
+ |
+class RankerModelLoader::Backend { |
+ public: |
+ Backend(const ValidateCallback& validate_callback, |
+ const OnModelAvailableCallback& on_model_available_callback, |
+ const base::FilePath& model_path, |
+ const GURL& model_url, |
+ const std::string& uma_prefix); |
+ ~Backend(); |
+ |
+ // Reads the model from |model_path_|. |
+ void LoadFromFile(); |
+ |
+ // Reads the model |model_url_|. |
+ void LoadFromURL(); |
+ |
+ private: |
+ // Log the result of loading a model to UMA. |
+ void ReportModelStatus(RankerModelStatus model_status); |
+ |
+ // Called to construct a model from the given |data|. |
groby-ooo-7-16
2017/02/14 20:56:48
nit: Please don't use passive voice: "Constructs a
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ std::unique_ptr<chrome_intelligence::RankerModel> Parse( |
+ const std::string& data); |
+ |
+ // Called the download initiated by LoadFromURL() completes. |
groby-ooo-7-16
2017/02/14 20:56:47
Would you mind documenting |id|?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ void OnDownloadComplete(int id, bool success, const std::string& data); |
+ |
+ // Transfer ownership of |model| to the |observer_|. |
hamelphi
2017/02/13 19:18:58
|observer_| is not defined in this class. Do you m
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I updated the comment and method names to refer to
|
+ void TransferModelToObserver( |
+ std::unique_ptr<chrome_intelligence::RankerModel> model); |
+ |
+ // Validates that ranker model loader tasks are all performed on the correct |
+ // sequence. |
+ base::SequenceChecker sequence_checker_; |
+ |
+ // The TaskRunner on which this was constructed. |
groby-ooo-7-16
2017/02/14 20:56:48
nit: "On which |this| was constructed"
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ const scoped_refptr<base::SequencedTaskRunner> origin_task_runner_; |
+ |
+ // Validates a ranker model on behalf of the model loader client. This may |
+ // be called on any sequence and must, therefore, be thread-safe. |
+ const ValidateCallback validate_callback_; |
+ |
+ // Transfers ownership of a loaded model back to the model loader client. |
+ // This will be called on the sequence on which the model loader was |
+ // constructed. |
+ const OnModelAvailableCallback on_model_available_callback_; |
+ |
+ // The path at which the model is (or should be) cached. |
+ const base::FilePath model_path_; |
+ |
+ // The URL from which to download the model if the model is not in the cache |
+ // or the cached model is invalid/expired. |
+ const GURL model_url_; |
+ |
+ // This will prefix all UMA metrics generated by the model loader. |
+ const std::string uma_prefix_; |
+ |
+ // Used to download model data from |model_url_|. |
+ // TODO(rogerm): Use net::URLFetcher directly? |
+ std::unique_ptr<TranslateURLFetcher> url_fetcher_; |
+ |
+ // The next time before which no new attempts to download the model should be |
+ // attempted. |
+ base::TimeTicks next_earliest_download_time_; |
groby-ooo-7-16
2017/02/14 20:56:47
Why not just use a timer to schedule the next atte
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I'm actually using this a throttle, not a deadline
|
+ |
+ // Tracks the last time of the last attempt to download a model. Used for UMA |
+ // reporting of download duration. |
+ base::TimeTicks download_start_time_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(Backend); |
+}; |
+ |
+RankerModelLoader::RankerModelLoader( |
+ const ValidateCallback& validate_callback, |
+ const OnModelAvailableCallback& on_model_available_callback, |
+ const base::FilePath& model_path, |
+ const GURL& model_url, |
+ const std::string& uma_prefix) |
+ : backend_task_runner_(base::CreateSequencedTaskRunnerWithTraits( |
+ base::TaskTraits().MayBlock())), |
fdoray
2017/02/15 20:07:40
Backend doesn't access memory that could be freed
Roger McFarlane (Chromium)
2017/02/21 22:16:50
Done.
The backend forwards downloaded models to t
|
+ backend_(base::MakeUnique<Backend>(validate_callback, |
+ on_model_available_callback, |
+ model_path, |
+ model_url, |
+ uma_prefix)) {} |
+ |
+RankerModelLoader::~RankerModelLoader() { |
+ // This is guaranteed to be sequenced after any pending backend operation. |
+ backend_task_runner_->DeleteSoon(FROM_HERE, backend_.release()); |
+} |
+ |
+void RankerModelLoader::Start() { |
+ backend_task_runner_->PostTask( |
+ FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromFile, |
+ base::Unretained(backend_.get()))); |
+} |
+ |
+void RankerModelLoader::NotifyOfNetworkAvailability() { |
+ backend_task_runner_->PostTask( |
+ FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromURL, |
+ base::Unretained(backend_.get()))); |
+} |
+ |
groby-ooo-7-16
2017/02/14 20:56:48
Maybe at least a double line separator to make it
Roger McFarlane (Chromium)
2017/02/21 22:16:48
Done.
|
+RankerModelLoader::Backend::Backend( |
+ const ValidateCallback& validate_callback, |
+ const OnModelAvailableCallback& on_model_available_callback, |
+ const base::FilePath& model_path, |
+ const GURL& model_url, |
+ const std::string& uma_prefix) |
+ : origin_task_runner_(base::SequencedTaskRunnerHandle::Get()), |
+ validate_callback_(validate_callback), |
+ on_model_available_callback_(on_model_available_callback), |
+ model_path_(model_path), |
+ model_url_(model_url), |
+ uma_prefix_(uma_prefix) { |
+ sequence_checker_.DetachFromSequence(); |
+} |
+ |
+RankerModelLoader::Backend::~Backend() = default; |
groby-ooo-7-16
2017/02/14 20:56:47
curious - why
RankerModelLoader::Backend::~B
Roger McFarlane (Chromium)
2017/02/21 22:16:48
they're equivalent, so no particular reason.
|
+ |
+void RankerModelLoader::Backend::ReportModelStatus( |
+ RankerModelStatus model_status) { |
+ base::HistogramBase* histogram = base::LinearHistogram::FactoryGet( |
+ uma_prefix_ + kModelStatusHistogram, 1, |
+ static_cast<int>(RankerModelStatus::MAX), |
+ static_cast<int>(RankerModelStatus::MAX) + 1, |
+ base::HistogramBase::kUmaTargetedHistogramFlag); |
+ if (histogram) |
+ histogram->Add(static_cast<int>(model_status)); |
+} |
+ |
+std::unique_ptr<chrome_intelligence::RankerModel> |
+RankerModelLoader::Backend::Parse(const std::string& data) { |
+ DCHECK(sequence_checker_.CalledOnValidSequence()); |
+ MyScopedHistogramTimer timer(uma_prefix_ + kParsetimerHistogram); |
+ |
+ auto model = base::MakeUnique<chrome_intelligence::RankerModel>(); |
+ |
+ if (!model->ParseFromString(data)) { |
+ ReportModelStatus(RankerModelStatus::PARSE_FAILED); |
+ return nullptr; |
+ } |
+ |
+ RankerModelStatus model_status = validate_callback_.Run(*model); |
groby-ooo-7-16
2017/02/14 20:56:48
Thought: If the callback took a ptr, and you had a
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Not quite. I might have a non-null, but invalid mo
|
+ ReportModelStatus(model_status); |
+ if (model_status != RankerModelStatus::OK) |
+ return nullptr; |
+ |
+ return model; |
+} |
+ |
+void RankerModelLoader::Backend::LoadFromFile() { |
+ DCHECK(sequence_checker_.CalledOnValidSequence()); |
+ // Attempt to read the model data from the cache file. |
+ std::string data; |
+ if (!model_path_.empty()) { |
+ DVLOG(2) << "Attempting to load model from: " << model_path_.value(); |
+ MyScopedHistogramTimer timer(uma_prefix_ + kReadTimerHistogram); |
+ if (!base::ReadFileToString(model_path_, &data)) { |
+ DVLOG(2) << "Failed to read model from: " << model_path_.value(); |
+ data.clear(); |
+ } |
+ } |
+ |
+ // If the model was successfully was read and is compatible, then notify |
+ // the "owner" of this model loader of the models availability (transferring |
+ // ownership of the model). If the model originated at the model_url_ and has |
+ // not expired then there is no need to download a new one. |
+ if (!data.empty()) { |
+ std::unique_ptr<chrome_intelligence::RankerModel> model = Parse(data); |
+ if (model) { |
+ std::string url_spec = model->metadata().source(); |
+ bool is_expired = IsExpired(*model); |
+ DVLOG(2) << (is_expired ? "Expired m" : "M") << "odel in '" |
+ << model_path_.value() << "'' was originally downloaded from '" |
+ << url_spec << "'."; |
+ |
+ TransferModelToObserver(std::move(model)); |
+ |
+ if (url_spec == model_url_.spec() && !is_expired) |
hamelphi
2017/02/13 19:18:58
Maybe comment on what it means if either condition
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
+ return; |
+ } |
+ } |
+ |
+ // Reaching this point means that a model download is required. If there is |
+ // no download URL configured, then there is nothing further to do. |
+ if (!model_url_.is_valid()) |
+ return; |
+ |
+ // Otherwise, initialize the model fetcher to be non-null and trigger an |
+ // initial download attempt. |
+ url_fetcher_ = base::MakeUnique<TranslateURLFetcher>(kUrlFetcherId); |
+ url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); |
+ LoadFromURL(); |
groby-ooo-7-16
2017/02/14 20:56:48
FWIW, LoadFromURL and LoadFromFile have slightly a
Roger McFarlane (Chromium)
2017/02/21 22:16:50
I made them a bit more symmetric.
url_fetcher_ ==
|
+} |
+ |
+void RankerModelLoader::Backend::LoadFromURL() { |
+ DCHECK(sequence_checker_.CalledOnValidSequence()); |
+ // Immediately exit if there is no download pending. |
+ if (!url_fetcher_) |
+ return; |
+ |
+ // If a request is already in flight, do not issue a new one. |
groby-ooo-7-16
2017/02/14 20:56:47
IIUC, we only need this because we're currently us
Roger McFarlane (Chromium)
2017/02/21 22:16:49
By transition-based approach, you mean watching th
|
+ if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) { |
+ DVLOG(2) << "RankerModelLoader: Download is in progress."; |
+ return; |
+ } |
+ // Do nothing if the download attempts should be throttled. |
+ if (base::TimeTicks::Now() < next_earliest_download_time_) { |
+ DVLOG(2) << "ModelLoadser: Last download attempt was too recent."; |
hamelphi
2017/02/13 19:18:58
s\ModelLoadser\ModelLoader
Roger McFarlane (Chromium)
2017/02/21 22:16:50
Done.
|
+ return; |
+ } |
+ |
+ DVLOG(2) << "Downloading model from: " << model_url_; |
+ |
+ // Reset the time of the next earliest allowable download attempt. |
+ next_earliest_download_time_ = |
+ base::TimeTicks::Now() + |
+ base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); |
+ |
+ // Kick off the next download attempt. |
+ download_start_time_ = base::TimeTicks::Now(); |
+ bool result = url_fetcher_->Request( |
+ model_url_, base::Bind(&RankerModelLoader::Backend::OnDownloadComplete, |
+ base::Unretained(this))); |
+ |
+ // The maximum number of download attempts has been surpassed. Don't make |
+ // any further attempts. |
+ if (!result) { |
+ DVLOG(2) << "Model download abandoned."; |
+ ReportModelStatus(RankerModelStatus::DOWNLOAD_FAILED); |
+ url_fetcher_.reset(); |
+ } |
+} |
+ |
+void RankerModelLoader::Backend::OnDownloadComplete(int /* id */, |
+ bool success, |
+ const std::string& data) { |
+ DCHECK(sequence_checker_.CalledOnValidSequence()); |
+ |
+ // Record the duration of the download. |
+ base::TimeDelta duration = base::TimeTicks::Now() - download_start_time_; |
+ base::HistogramBase* counter = base::Histogram::FactoryTimeGet( |
+ uma_prefix_ + kDownloadTimerHistogram, |
+ base::TimeDelta::FromMilliseconds(10), |
+ base::TimeDelta::FromMilliseconds(200000), 100, |
+ base::HistogramBase::kUmaTargetedHistogramFlag); |
+ if (counter) |
+ counter->AddTime(duration); |
+ |
+ // On failure, we just abort. The TranslateRanker will retry on a subsequent |
+ // translation opportunity. The TranslateURLFetcher enforces a limit for |
groby-ooo-7-16
2017/02/14 20:56:48
Should we instead retry once network is available?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
See my previous comments/thoughts/questions about
|
+ // retried requests. |
+ if (!success || data.empty()) { |
+ DVLOG(2) << "Download from '" << model_url_ << "'' failed."; |
+ return; |
+ } |
+ |
+ auto model = Parse(data); |
+ if (!model) { |
+ DVLOG(2) << "Model from '" << model_url_ << "'' not valid."; |
+ return; |
+ } |
+ |
+ url_fetcher_.reset(); |
+ |
+ auto* metadata = model->mutable_metadata(); |
+ metadata->set_source(model_url_.spec()); |
+ metadata->set_last_modified_sec( |
+ (base::Time::Now() - base::Time()).InSeconds()); |
+ |
+ if (!model_path_.empty()) { |
+ DVLOG(2) << "Saving model from '" << model_url_ << "'' to '" |
+ << model_path_.value() << "'."; |
+ MyScopedHistogramTimer timer(uma_prefix_ + kWriteTimerHistogram); |
+ base::ImportantFileWriter::WriteFileAtomically(model_path_, |
+ model->SerializeAsString()); |
+ } |
+ |
+ // Notify the owner that a compatible model is available. |
+ TransferModelToObserver(std::move(model)); |
+} |
+ |
+void RankerModelLoader::Backend::TransferModelToObserver( |
+ std::unique_ptr<chrome_intelligence::RankerModel> model) { |
+ DCHECK(sequence_checker_.CalledOnValidSequence()); |
+ origin_task_runner_->PostTask( |
+ FROM_HERE, |
+ base::Bind(on_model_available_callback_, base::Passed(std::move(model)))); |
+} |
+ |
+} // namespace translate |