Chromium Code Reviews| Index: components/translate/core/browser/ranker_model_loader.cc |
| diff --git a/components/translate/core/browser/ranker_model_loader.cc b/components/translate/core/browser/ranker_model_loader.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f969fea762e29301d928fe281e1b9daed6ba6f74 |
| --- /dev/null |
| +++ b/components/translate/core/browser/ranker_model_loader.cc |
| @@ -0,0 +1,381 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/translate/core/browser/ranker_model_loader.h" |
| + |
| +#include "base/bind.h" |
| +#include "base/bind_helpers.h" |
| +#include "base/command_line.h" |
| +#include "base/files/file_path.h" |
| +#include "base/files/file_util.h" |
| +#include "base/files/important_file_writer.h" |
| +#include "base/macros.h" |
| +#include "base/memory/ptr_util.h" |
| +#include "base/memory/ref_counted.h" |
| +#include "base/metrics/histogram_macros.h" |
| +#include "base/profiler/scoped_tracker.h" |
| +#include "base/strings/string_util.h" |
| +#include "base/task_scheduler/post_task.h" |
| +#include "base/threading/sequenced_task_runner_handle.h" |
| +#include "components/translate/core/browser/proto/ranker_model.pb.h" |
| +#include "components/translate/core/browser/translate_url_fetcher.h" |
| +#include "components/translate/core/common/translate_switches.h" |
| +#include "components/variations/variations_associated_data.h" |
| + |
| +namespace translate { |
| + |
| +namespace { |
| + |
| +const int kUrlFetcherId = 2; |
|
groby-ooo-7-16
2017/02/14 20:56:47
Why do we hardcode a fetcher ID? I've only seen th
fdoray
2017/02/15 20:07:40
constexpr here and below
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I followed the other examples of the TranslateURLF
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + |
| +// The maximum number of model download attempts to make. Download may fail |
| +// due to server error or network availability issues. |
| +const int kMaxRetryOn5xx = 8; |
| + |
| +// The minimum duration, in minutes, between download attempts. |
| +const int kDownloadRefractoryPeriodMin = 3; |
|
groby-ooo-7-16
2017/02/14 20:56:47
1) since this is a duration, I'd love for this to
Roger McFarlane (Chromium)
2017/02/21 22:16:48
I'll make this an input param (via an options clas
|
| + |
| +// Suffixes for the various histograms produced by the backend. |
| +const char kWriteTimerHistogram[] = ".Timer.WriteModel"; |
| +const char kReadTimerHistogram[] = ".Timer.ReadModel"; |
| +const char kDownloadTimerHistogram[] = ".Timer.DownloadModel"; |
| +const char kParsetimerHistogram[] = ".Timer.ParseModel"; |
| +const char kModelStatusHistogram[] = ".Model.Status"; |
| + |
| +// A helper class to produce a scoped timer histogram that supports using a |
| +// non-static-const name. |
| +class MyScopedHistogramTimer { |
| + public: |
| + MyScopedHistogramTimer(const base::StringPiece& name) |
|
groby-ooo-7-16
2017/02/14 20:56:48
FWIW: I take it the UMA peeps are OK with dynamica
Roger McFarlane (Chromium)
2017/02/21 22:16:50
I got this recipe from the UMA folks, yes.
I'll a
|
| + : name_(name.begin(), name.end()), start_(base::TimeTicks::Now()) {} |
| + |
| + ~MyScopedHistogramTimer() { |
| + base::TimeDelta duration = base::TimeTicks::Now() - start_; |
| + base::HistogramBase* counter = base::Histogram::FactoryTimeGet( |
| + name_, base::TimeDelta::FromMilliseconds(10), |
| + base::TimeDelta::FromMilliseconds(200000), 100, |
| + base::HistogramBase::kUmaTargetedHistogramFlag); |
| + if (counter) |
| + counter->AddTime(duration); |
| + } |
| + |
| + private: |
| + const std::string name_; |
| + const base::TimeTicks start_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(MyScopedHistogramTimer); |
| +}; |
| + |
| +bool IsExpired(const chrome_intelligence::RankerModel& model) { |
|
groby-ooo-7-16
2017/02/14 20:56:47
Should this be a member on the model?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + DCHECK(model.has_metadata()); |
| + const auto& metadata = model.metadata(); |
| + |
| + // If the age of the model cannot be determined, presume it to be expired. |
| + if (!metadata.has_last_modified_sec()) |
| + return true; |
| + |
| + // If the model has no set cache duration, then it never expires. |
| + if (!metadata.has_cache_duration_sec() || metadata.cache_duration_sec() == 0) |
| + return false; |
| + |
| + // Otherwise, a model is expired if its age exceeds the cache duration. |
| + base::Time last_modified = |
| + base::Time() + base::TimeDelta::FromSeconds(metadata.last_modified_sec()); |
| + base::TimeDelta age = base::Time::Now() - last_modified; |
| + return age > base::TimeDelta::FromSeconds(metadata.cache_duration_sec()); |
| +} |
| + |
| +} // namespace |
| + |
| +class RankerModelLoader::Backend { |
| + public: |
| + Backend(const ValidateCallback& validate_callback, |
| + const OnModelAvailableCallback& on_model_available_callback, |
| + const base::FilePath& model_path, |
| + const GURL& model_url, |
| + const std::string& uma_prefix); |
| + ~Backend(); |
| + |
| + // Reads the model from |model_path_|. |
| + void LoadFromFile(); |
| + |
| + // Reads the model |model_url_|. |
| + void LoadFromURL(); |
| + |
| + private: |
| + // Log the result of loading a model to UMA. |
| + void ReportModelStatus(RankerModelStatus model_status); |
| + |
| + // Called to construct a model from the given |data|. |
|
groby-ooo-7-16
2017/02/14 20:56:48
nit: Please don't use passive voice: "Constructs a
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + std::unique_ptr<chrome_intelligence::RankerModel> Parse( |
| + const std::string& data); |
| + |
| + // Called the download initiated by LoadFromURL() completes. |
|
groby-ooo-7-16
2017/02/14 20:56:47
Would you mind documenting |id|?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + void OnDownloadComplete(int id, bool success, const std::string& data); |
| + |
| + // Transfer ownership of |model| to the |observer_|. |
|
hamelphi
2017/02/13 19:18:58
|observer_| is not defined in this class. Do you m
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I updated the comment and method names to refer to
|
| + void TransferModelToObserver( |
| + std::unique_ptr<chrome_intelligence::RankerModel> model); |
| + |
| + // Validates that ranker model loader tasks are all performed on the correct |
| + // sequence. |
| + base::SequenceChecker sequence_checker_; |
| + |
| + // The TaskRunner on which this was constructed. |
|
groby-ooo-7-16
2017/02/14 20:56:48
nit: "On which |this| was constructed"
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + const scoped_refptr<base::SequencedTaskRunner> origin_task_runner_; |
| + |
| + // Validates a ranker model on behalf of the model loader client. This may |
| + // be called on any sequence and must, therefore, be thread-safe. |
| + const ValidateCallback validate_callback_; |
| + |
| + // Transfers ownership of a loaded model back to the model loader client. |
| + // This will be called on the sequence on which the model loader was |
| + // constructed. |
| + const OnModelAvailableCallback on_model_available_callback_; |
| + |
| + // The path at which the model is (or should be) cached. |
| + const base::FilePath model_path_; |
| + |
| + // The URL from which to download the model if the model is not in the cache |
| + // or the cached model is invalid/expired. |
| + const GURL model_url_; |
| + |
| + // This will prefix all UMA metrics generated by the model loader. |
| + const std::string uma_prefix_; |
| + |
| + // Used to download model data from |model_url_|. |
| + // TODO(rogerm): Use net::URLFetcher directly? |
| + std::unique_ptr<TranslateURLFetcher> url_fetcher_; |
| + |
| + // The next time before which no new attempts to download the model should be |
| + // attempted. |
| + base::TimeTicks next_earliest_download_time_; |
|
groby-ooo-7-16
2017/02/14 20:56:47
Why not just use a timer to schedule the next atte
Roger McFarlane (Chromium)
2017/02/21 22:16:49
I'm actually using this a throttle, not a deadline
|
| + |
| + // Tracks the last time of the last attempt to download a model. Used for UMA |
| + // reporting of download duration. |
| + base::TimeTicks download_start_time_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(Backend); |
| +}; |
| + |
| +RankerModelLoader::RankerModelLoader( |
| + const ValidateCallback& validate_callback, |
| + const OnModelAvailableCallback& on_model_available_callback, |
| + const base::FilePath& model_path, |
| + const GURL& model_url, |
| + const std::string& uma_prefix) |
| + : backend_task_runner_(base::CreateSequencedTaskRunnerWithTraits( |
| + base::TaskTraits().MayBlock())), |
|
fdoray
2017/02/15 20:07:40
Backend doesn't access memory that could be freed
Roger McFarlane (Chromium)
2017/02/21 22:16:50
Done.
The backend forwards downloaded models to t
|
| + backend_(base::MakeUnique<Backend>(validate_callback, |
| + on_model_available_callback, |
| + model_path, |
| + model_url, |
| + uma_prefix)) {} |
| + |
| +RankerModelLoader::~RankerModelLoader() { |
| + // This is guaranteed to be sequenced after any pending backend operation. |
| + backend_task_runner_->DeleteSoon(FROM_HERE, backend_.release()); |
| +} |
| + |
| +void RankerModelLoader::Start() { |
| + backend_task_runner_->PostTask( |
| + FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromFile, |
| + base::Unretained(backend_.get()))); |
| +} |
| + |
| +void RankerModelLoader::NotifyOfNetworkAvailability() { |
| + backend_task_runner_->PostTask( |
| + FROM_HERE, base::Bind(&RankerModelLoader::Backend::LoadFromURL, |
| + base::Unretained(backend_.get()))); |
| +} |
| + |
|
groby-ooo-7-16
2017/02/14 20:56:48
Maybe at least a double line separator to make it
Roger McFarlane (Chromium)
2017/02/21 22:16:48
Done.
|
| +RankerModelLoader::Backend::Backend( |
| + const ValidateCallback& validate_callback, |
| + const OnModelAvailableCallback& on_model_available_callback, |
| + const base::FilePath& model_path, |
| + const GURL& model_url, |
| + const std::string& uma_prefix) |
| + : origin_task_runner_(base::SequencedTaskRunnerHandle::Get()), |
| + validate_callback_(validate_callback), |
| + on_model_available_callback_(on_model_available_callback), |
| + model_path_(model_path), |
| + model_url_(model_url), |
| + uma_prefix_(uma_prefix) { |
| + sequence_checker_.DetachFromSequence(); |
| +} |
| + |
| +RankerModelLoader::Backend::~Backend() = default; |
|
groby-ooo-7-16
2017/02/14 20:56:47
curious - why
RankerModelLoader::Backend::~B
Roger McFarlane (Chromium)
2017/02/21 22:16:48
they're equivalent, so no particular reason.
|
| + |
| +void RankerModelLoader::Backend::ReportModelStatus( |
| + RankerModelStatus model_status) { |
| + base::HistogramBase* histogram = base::LinearHistogram::FactoryGet( |
| + uma_prefix_ + kModelStatusHistogram, 1, |
| + static_cast<int>(RankerModelStatus::MAX), |
| + static_cast<int>(RankerModelStatus::MAX) + 1, |
| + base::HistogramBase::kUmaTargetedHistogramFlag); |
| + if (histogram) |
| + histogram->Add(static_cast<int>(model_status)); |
| +} |
| + |
| +std::unique_ptr<chrome_intelligence::RankerModel> |
| +RankerModelLoader::Backend::Parse(const std::string& data) { |
| + DCHECK(sequence_checker_.CalledOnValidSequence()); |
| + MyScopedHistogramTimer timer(uma_prefix_ + kParsetimerHistogram); |
| + |
| + auto model = base::MakeUnique<chrome_intelligence::RankerModel>(); |
| + |
| + if (!model->ParseFromString(data)) { |
| + ReportModelStatus(RankerModelStatus::PARSE_FAILED); |
| + return nullptr; |
| + } |
| + |
| + RankerModelStatus model_status = validate_callback_.Run(*model); |
|
groby-ooo-7-16
2017/02/14 20:56:48
Thought: If the callback took a ptr, and you had a
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Not quite. I might have a non-null, but invalid mo
|
| + ReportModelStatus(model_status); |
| + if (model_status != RankerModelStatus::OK) |
| + return nullptr; |
| + |
| + return model; |
| +} |
| + |
| +void RankerModelLoader::Backend::LoadFromFile() { |
| + DCHECK(sequence_checker_.CalledOnValidSequence()); |
| + // Attempt to read the model data from the cache file. |
| + std::string data; |
| + if (!model_path_.empty()) { |
| + DVLOG(2) << "Attempting to load model from: " << model_path_.value(); |
| + MyScopedHistogramTimer timer(uma_prefix_ + kReadTimerHistogram); |
| + if (!base::ReadFileToString(model_path_, &data)) { |
| + DVLOG(2) << "Failed to read model from: " << model_path_.value(); |
| + data.clear(); |
| + } |
| + } |
| + |
| + // If the model was successfully was read and is compatible, then notify |
| + // the "owner" of this model loader of the models availability (transferring |
| + // ownership of the model). If the model originated at the model_url_ and has |
| + // not expired then there is no need to download a new one. |
| + if (!data.empty()) { |
| + std::unique_ptr<chrome_intelligence::RankerModel> model = Parse(data); |
| + if (model) { |
| + std::string url_spec = model->metadata().source(); |
| + bool is_expired = IsExpired(*model); |
| + DVLOG(2) << (is_expired ? "Expired m" : "M") << "odel in '" |
| + << model_path_.value() << "'' was originally downloaded from '" |
| + << url_spec << "'."; |
| + |
| + TransferModelToObserver(std::move(model)); |
| + |
| + if (url_spec == model_url_.spec() && !is_expired) |
|
hamelphi
2017/02/13 19:18:58
Maybe comment on what it means if either condition
Roger McFarlane (Chromium)
2017/02/21 22:16:49
Done.
|
| + return; |
| + } |
| + } |
| + |
| + // Reaching this point means that a model download is required. If there is |
| + // no download URL configured, then there is nothing further to do. |
| + if (!model_url_.is_valid()) |
| + return; |
| + |
| + // Otherwise, initialize the model fetcher to be non-null and trigger an |
| + // initial download attempt. |
| + url_fetcher_ = base::MakeUnique<TranslateURLFetcher>(kUrlFetcherId); |
| + url_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); |
| + LoadFromURL(); |
|
groby-ooo-7-16
2017/02/14 20:56:48
FWIW, LoadFromURL and LoadFromFile have slightly a
Roger McFarlane (Chromium)
2017/02/21 22:16:50
I made them a bit more symmetric.
url_fetcher_ ==
|
| +} |
| + |
| +void RankerModelLoader::Backend::LoadFromURL() { |
| + DCHECK(sequence_checker_.CalledOnValidSequence()); |
| + // Immediately exit if there is no download pending. |
| + if (!url_fetcher_) |
| + return; |
| + |
| + // If a request is already in flight, do not issue a new one. |
|
groby-ooo-7-16
2017/02/14 20:56:47
IIUC, we only need this because we're currently us
Roger McFarlane (Chromium)
2017/02/21 22:16:49
By transition-based approach, you mean watching th
|
| + if (url_fetcher_->state() == TranslateURLFetcher::REQUESTING) { |
| + DVLOG(2) << "RankerModelLoader: Download is in progress."; |
| + return; |
| + } |
| + // Do nothing if the download attempts should be throttled. |
| + if (base::TimeTicks::Now() < next_earliest_download_time_) { |
| + DVLOG(2) << "ModelLoadser: Last download attempt was too recent."; |
|
hamelphi
2017/02/13 19:18:58
s\ModelLoadser\ModelLoader
Roger McFarlane (Chromium)
2017/02/21 22:16:50
Done.
|
| + return; |
| + } |
| + |
| + DVLOG(2) << "Downloading model from: " << model_url_; |
| + |
| + // Reset the time of the next earliest allowable download attempt. |
| + next_earliest_download_time_ = |
| + base::TimeTicks::Now() + |
| + base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); |
| + |
| + // Kick off the next download attempt. |
| + download_start_time_ = base::TimeTicks::Now(); |
| + bool result = url_fetcher_->Request( |
| + model_url_, base::Bind(&RankerModelLoader::Backend::OnDownloadComplete, |
| + base::Unretained(this))); |
| + |
| + // The maximum number of download attempts has been surpassed. Don't make |
| + // any further attempts. |
| + if (!result) { |
| + DVLOG(2) << "Model download abandoned."; |
| + ReportModelStatus(RankerModelStatus::DOWNLOAD_FAILED); |
| + url_fetcher_.reset(); |
| + } |
| +} |
| + |
| +void RankerModelLoader::Backend::OnDownloadComplete(int /* id */, |
| + bool success, |
| + const std::string& data) { |
| + DCHECK(sequence_checker_.CalledOnValidSequence()); |
| + |
| + // Record the duration of the download. |
| + base::TimeDelta duration = base::TimeTicks::Now() - download_start_time_; |
| + base::HistogramBase* counter = base::Histogram::FactoryTimeGet( |
| + uma_prefix_ + kDownloadTimerHistogram, |
| + base::TimeDelta::FromMilliseconds(10), |
| + base::TimeDelta::FromMilliseconds(200000), 100, |
| + base::HistogramBase::kUmaTargetedHistogramFlag); |
| + if (counter) |
| + counter->AddTime(duration); |
| + |
| + // On failure, we just abort. The TranslateRanker will retry on a subsequent |
| + // translation opportunity. The TranslateURLFetcher enforces a limit for |
|
groby-ooo-7-16
2017/02/14 20:56:48
Should we instead retry once network is available?
Roger McFarlane (Chromium)
2017/02/21 22:16:49
See my previous comments/thoughts/questions about
|
| + // retried requests. |
| + if (!success || data.empty()) { |
| + DVLOG(2) << "Download from '" << model_url_ << "'' failed."; |
| + return; |
| + } |
| + |
| + auto model = Parse(data); |
| + if (!model) { |
| + DVLOG(2) << "Model from '" << model_url_ << "'' not valid."; |
| + return; |
| + } |
| + |
| + url_fetcher_.reset(); |
| + |
| + auto* metadata = model->mutable_metadata(); |
| + metadata->set_source(model_url_.spec()); |
| + metadata->set_last_modified_sec( |
| + (base::Time::Now() - base::Time()).InSeconds()); |
| + |
| + if (!model_path_.empty()) { |
| + DVLOG(2) << "Saving model from '" << model_url_ << "'' to '" |
| + << model_path_.value() << "'."; |
| + MyScopedHistogramTimer timer(uma_prefix_ + kWriteTimerHistogram); |
| + base::ImportantFileWriter::WriteFileAtomically(model_path_, |
| + model->SerializeAsString()); |
| + } |
| + |
| + // Notify the owner that a compatible model is available. |
| + TransferModelToObserver(std::move(model)); |
| +} |
| + |
| +void RankerModelLoader::Backend::TransferModelToObserver( |
| + std::unique_ptr<chrome_intelligence::RankerModel> model) { |
| + DCHECK(sequence_checker_.CalledOnValidSequence()); |
| + origin_task_runner_->PostTask( |
| + FROM_HERE, |
| + base::Bind(on_model_available_callback_, base::Passed(std::move(model)))); |
| +} |
| + |
| +} // namespace translate |