Chromium Code Reviews| Index: components/translate/core/browser/translate_ranker.cc |
| diff --git a/components/translate/core/browser/translate_ranker.cc b/components/translate/core/browser/translate_ranker.cc |
| index fe2a214ee397936d1210379f595bfeac9c99b616..19b161e0e85ecc5d33509a6cd0266642ddc01208 100644 |
| --- a/components/translate/core/browser/translate_ranker.cc |
| +++ b/components/translate/core/browser/translate_ranker.cc |
| @@ -9,44 +9,50 @@ |
| #include "base/bind.h" |
| #include "base/bind_helpers.h" |
| #include "base/command_line.h" |
| +#include "base/files/file_path.h" |
| +#include "base/files/file_util.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/profiler/scoped_tracker.h" |
| +#include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "components/metrics/proto/translate_event.pb.h" |
| #include "components/translate/core/browser/proto/translate_ranker_model.pb.h" |
| #include "components/translate/core/browser/translate_download_manager.h" |
| #include "components/translate/core/browser/translate_prefs.h" |
| +#include "components/translate/core/browser/translate_ranker_model_loader.h" |
| #include "components/translate/core/browser/translate_url_fetcher.h" |
| #include "components/translate/core/common/translate_switches.h" |
| +#include "components/variations/variations_associated_data.h" |
| + |
| +#if defined(OS_WIN) |
| +#include "base/base_paths_win.h" |
| +#include "base/path_service.h" |
| +#include "base/strings/utf_string_conversions.h" |
| +#elif defined(OS_ANDROID) |
| +#include "base/base_paths_android.h" |
| +#include "base/path_service.h" |
| +#elif defined(OS_LINUX) |
| +#include "base/environment.h" |
| +#include "base/nix/xdg_util.h" |
| +#elif defined(OS_MACOSX) |
| +#include "base/base_paths_mac.h" |
| +#include "base/path_service.h" |
| +#endif |
| namespace translate { |
| namespace { |
| +using chrome_intelligence::TranslateRankerModel; |
| + |
| +const char kUserDataDirSwitch[] = "user-data-dir"; |
|
pasko
2016/12/19 14:26:49
this looks like the wrong layer to do it. kUserDat
Roger McFarlane (Chromium)
2017/02/08 23:08:07
I've turned the ranker into a KeyedService and con
|
| +const char kTranslateRankerModelFileName[] = "Translate Ranker Model"; |
| typedef google::protobuf::Map<std::string, float> WeightMap; |
| const double kTranslationOfferThreshold = 0.5; |
| -// Parameters for model fetching. |
| -const char kTranslateRankerModelURL[] = |
| - "https://chromium-i18n.appspot.com/ssl-translate-ranker-model"; |
| -const int kMaxRetryOn5xx = 3; |
| -const int kDownloadRefractoryPeriodMin = 15; |
| const char kUnknown[] = "UNKNOWN"; |
| -// Enumeration denoting the outcome of an attempt to download the translate |
| -// ranker model. This must be kept in sync with the TranslateRankerModelStatus |
| -// enum in histograms.xml |
| -enum ModelStatus { |
| - MODEL_STATUS_OK = 0, |
| - MODEL_STATUS_DOWNLOAD_THROTTLED = 1, |
| - MODEL_STATUS_DOWNLOAD_FAILED = 2, |
| - MODEL_STATUS_PARSE_FAILED = 3, |
| - MODEL_STATUS_VALIDATION_FAILED = 4, |
| - // Insert new values above this line. |
| - MODEL_STATUS_MAX |
| -}; |
| - |
| double Sigmoid(double x) { |
| return 1.0 / (1.0 + exp(-x)); |
| } |
| @@ -58,17 +64,94 @@ double ScoreComponent(const WeightMap& weights, const std::string& key) { |
| return i == weights.end() ? 0.0 : i->second; |
| } |
| -GURL GetTranslateRankerURL() { |
| +GURL GetTranslateRankerModelURL() { |
| + // Allow override of the ranker model URL from the command line. |
| + base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
| + if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) { |
| + return GURL( |
| + command_line->GetSwitchValueASCII(switches::kTranslateRankerModelURL)); |
| + } |
| + |
| + // Otherwise take the ranker model URL from the ranker query variation. |
| + const std::string raw_url = variations::GetVariationParamValueByFeature( |
| + kTranslateRankerQuery, switches::kTranslateRankerModelURL); |
| + |
| + DVLOG(3) << switches::kTranslateRankerModelURL << " = " << raw_url; |
| + |
| + return GURL(raw_url); |
| +} |
| + |
| +base::FilePath GetUserDataDir() { |
| + base::FilePath path; |
| base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
| - return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL) |
| - ? command_line->GetSwitchValueASCII( |
| - switches::kTranslateRankerModelURL) |
| - : kTranslateRankerModelURL); |
| + if (command_line->HasSwitch(kUserDataDirSwitch)) { |
| + path = command_line->GetSwitchValuePath(kUserDataDirSwitch); |
| + } else { |
| +#if defined(OS_WIN) |
| + CHECK(PathService::Get(base::DIR_LOCAL_APP_DATA, &path)); |
| +#elif defined(OS_MACOSX) |
| + CHECK(PathService::Get(base::DIR_APP_DATA, &path)); |
| +#elif defined(OS_ANDROID) |
| + CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA, &path)); |
| +#elif defined(OS_LINUX) |
| + std::unique_ptr<base::Environment> env(base::Environment::Create()); |
| + path = base::nix::GetXDGDirectory( |
| + env.get(), base::nix::kXdgConfigHomeEnvVar, base::nix::kDotConfigDir); |
| +#else |
| + NOTIMPLEMENTED(); |
| +#endif |
| + } |
| + return path; |
| } |
| -void ReportModelStatus(ModelStatus model_status) { |
| - UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status, |
| - MODEL_STATUS_MAX); |
| +base::FilePath GetTranslateRankerModelPath() { |
| + // Allow override of the ranker model path from the command line. |
| + base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
| + if (command_line->HasSwitch(switches::kTranslateRankerModelPath)) { |
| + return base::FilePath(command_line->GetSwitchValueNative( |
| + switches::kTranslateRankerModelPath)); |
| + } |
| + |
| + // Otherwise, look for the file in the top-level user data dir. |
| + return GetUserDataDir().AppendASCII(kTranslateRankerModelFileName); |
| +} |
| + |
| +bool IsUpToDate(const TranslateRankerModel& model) { |
| + base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
| + |
| + // When forcibly loading a model from disk, assume it's up-to-date to skip |
| + // downloading a new model over it. |
| + if (command_line->HasSwitch(switches::kTranslateRankerModelPath)) |
| + return true; |
| + |
| + // When forcibly download a model, assume that any model loaded from disk |
| + // is out-of-date. |
| + if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) |
| + return false; |
| + |
| + // Otherwise take the expected ranker model URL from the ranker query |
| + // variation. |
| + const std::string& expected_version_str = |
| + variations::GetVariationParamValueByFeature( |
| + kTranslateRankerQuery, "translate-ranker-model-version"); |
| + |
| + DVLOG(2) << "Expected version = '" << expected_version_str << "'"; |
| + DVLOG(2) << "Received version = '" << model.version() << "'"; |
| + |
| + unsigned expected_version; |
| + return base::StringToUint(expected_version_str, &expected_version) && |
| + model.has_version() && model.version() == expected_version; |
| +} |
| + |
| +bool IsCompatible(const chrome_intelligence::TranslateRankerModel& model) { |
| + if (!model.has_logistic_regression_model()) |
| + return false; |
| + |
| + const TranslateRankerModel::LogisticRegressionModel& logit = |
| + model.logistic_regression_model(); |
| + |
| + return logit.has_bias() && logit.has_accept_ratio_weight() && |
| + logit.has_decline_ratio_weight(); |
| } |
| } // namespace |
| @@ -109,15 +192,38 @@ TranslateRanker* TranslateRanker::GetInstance() { |
| return base::Singleton<TranslateRanker>::get(); |
| } |
| +// static |
| std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting( |
| const std::string& model_data) { |
| std::unique_ptr<TranslateRanker> ranker(new TranslateRanker()); |
| + std::unique_ptr<TranslateRankerModel> model(new TranslateRankerModel()); |
| CHECK(ranker != nullptr); |
| - ranker->ParseModel(0, true, model_data); |
| - CHECK(ranker->model_ != nullptr); |
| + CHECK(model != nullptr); |
| + CHECK(model->ParseFromString(model_data)); |
| + CHECK(IsCompatible(*model)); |
| + ranker->SetSharedModelPtr(std::move(model)); |
| return ranker; |
| } |
| +void TranslateRanker::StartModelLoader() { |
| + if (model_loader_) |
| + return; |
| + |
| + base::AutoLock auto_lock(lock_); |
| + |
| + if (model_loader_) |
| + return; |
| + |
| + model_loader_.reset(new TranslateRankerModelLoader()); |
| + model_loader_->set_cache_file_path(GetTranslateRankerModelPath()) |
| + .set_download_url(GetTranslateRankerModelURL()) |
| + .set_is_compatible_func(base::BindRepeating(&IsCompatible)) |
| + .set_is_up_to_date_func(base::BindRepeating(&IsUpToDate)) |
| + .set_on_model_available_func(base::BindRepeating( |
| + &TranslateRanker::SetSharedModelPtr, base::Unretained(this))) |
| + .Start(); |
| +} |
| + |
| bool TranslateRanker::ShouldOfferTranslation( |
| const TranslatePrefs& translate_prefs, |
| const std::string& src_lang, |
| @@ -129,13 +235,14 @@ bool TranslateRanker::ShouldOfferTranslation( |
| // (or become False). |
| const bool kDefaultResponse = true; |
| + ConstSharedModelPtr model = GetSharedModelPtr(); |
| + |
| // If we don't have a model, request one and return the default. |
| - if (model_ == nullptr) { |
| - FetchModelData(); |
| + if (model == nullptr) { |
| return kDefaultResponse; |
| } |
| - DCHECK(model_->has_logistic_regression_model()); |
| + DCHECK(IsCompatible(model->data)); |
| SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation"); |
| @@ -150,7 +257,7 @@ bool TranslateRanker::ShouldOfferTranslation( |
| double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang); |
| double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang); |
| double ignored_count = |
| - model_->logistic_regression_model().has_ignore_ratio_weight() |
| + model->data.logistic_regression_model().has_ignore_ratio_weight() |
| ? translate_prefs.GetTranslationIgnoredCount(src_lang) |
| : 0.0; |
| double total_count = accept_count + denied_count + ignored_count; |
| @@ -171,8 +278,9 @@ bool TranslateRanker::ShouldOfferTranslation( |
| << ", decline_ratio=" << decline_ratio |
| << ", ignore_ratio=" << ignore_ratio << "]"; |
| - double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio, |
| - src_lang, dst_lang, app_locale, country); |
| + double score = |
| + CalculateScore(model->data, accept_ratio, decline_ratio, ignore_ratio, |
| + src_lang, dst_lang, app_locale, country); |
| DVLOG(2) << "TranslateRanker Score: " << score; |
| @@ -185,18 +293,18 @@ bool TranslateRanker::ShouldOfferTranslation( |
| TranslateRanker::TranslateRanker() {} |
| -double TranslateRanker::CalculateScore(double accept_ratio, |
| +double TranslateRanker::CalculateScore(const TranslateRankerModel& model, |
| + double accept_ratio, |
| double decline_ratio, |
| double ignore_ratio, |
| const std::string& src_lang, |
| const std::string& dst_lang, |
| const std::string& locale, |
| const std::string& country) { |
| + DCHECK(IsCompatible(model)); |
| SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore"); |
| - DCHECK(model_ != nullptr); |
| - DCHECK(model_->has_logistic_regression_model()); |
| const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel& |
| - logit = model_->logistic_regression_model(); |
| + logit = model.logistic_regression_model(); |
| double dot_product = |
| (accept_ratio * logit.accept_ratio_weight()) + |
| (decline_ratio * logit.decline_ratio_weight()) + |
| @@ -208,87 +316,29 @@ double TranslateRanker::CalculateScore(double accept_ratio, |
| return Sigmoid(dot_product + logit.bias()); |
| } |
| -int TranslateRanker::GetModelVersion() const { |
| - return (model_ == nullptr) ? 0 : model_->version(); |
| +TranslateRanker::ConstSharedModelPtr TranslateRanker::GetSharedModelPtr() |
| + const { |
| + base::AutoLock auto_lock(lock_); |
| + return shared_model_ptr_; |
| } |
| -void TranslateRanker::FetchModelData() { |
| - // Exit if the model has already been successfully loaded. |
| - if (model_ != nullptr) { |
| - return; |
| - } |
| - |
| - // Exit if the download has been throttled. |
| - if (base::Time::NowFromSystemTime() < next_earliest_download_time_) { |
| - return; |
| - } |
| - |
| - // Create the model fetcher if it does not exist. |
| - if (model_fetcher_ == nullptr) { |
| - model_fetcher_.reset(new TranslateURLFetcher(kFetcherId)); |
| - model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); |
| - } |
| - |
| - // If a request is already in flight, do not issue a new one. |
| - if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) { |
| - DVLOG(2) << "TranslateRanker: Download complete or in progress."; |
| - return; |
| - } |
| - |
| - DVLOG(2) << "TranslateRanker: Downloading model..."; |
| +void TranslateRanker::SetSharedModelPtr( |
| + std::unique_ptr<TranslateRankerModel> new_model) { |
| + DCHECK(!new_model || IsCompatible(*new_model)); |
| - download_start_time_ = base::Time::Now(); |
| - bool result = model_fetcher_->Request( |
| - GetTranslateRankerURL(), |
| - base::Bind(&TranslateRanker::ParseModel, base::Unretained(this))); |
| + // Create a new shared model instance and swap the model contents into it. |
| + SharedModelPtr new_shared_model_ptr( |
| + new base::RefCountedData<TranslateRankerModel>()); |
| + new_shared_model_ptr->data.Swap(new_model.get()); |
| - if (!result) { |
| - ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED); |
| - next_earliest_download_time_ = |
| - base::Time::NowFromSystemTime() + |
| - base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); |
| - } |
| + // Grab the lock and update the shared model pointer. |
| + base::AutoLock auto_lock(lock_); |
| + shared_model_ptr_ = new_shared_model_ptr; |
| } |
| -void TranslateRanker::ParseModel(int /* id */, |
| - bool success, |
| - const std::string& data) { |
| - UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel", |
| - base::Time::Now() - download_start_time_); |
| - |
| - SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel"); |
| - |
| - // We should not be here if the model has already been downloaded and parsed. |
| - DCHECK(model_ == nullptr); |
| - |
| - // On failure, we just abort. The TranslateRanker will retry on a subsequent |
| - // translation opportunity. The TranslateURLFetcher enforces a limit for |
| - // retried requests. |
| - if (!success) { |
| - ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED); |
| - return; |
| - } |
| - |
| - // Create a new model instance, parse and validate the data, and move it over |
| - // to be used by the ranker. |
| - std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model( |
| - new chrome_intelligence::TranslateRankerModel()); |
| - |
| - bool is_parseable = new_model->ParseFromString(data); |
| - if (!is_parseable) { |
| - ReportModelStatus(MODEL_STATUS_PARSE_FAILED); |
| - return; |
| - } |
| - |
| - bool is_valid = new_model->has_logistic_regression_model(); |
| - if (!is_valid) { |
| - ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED); |
| - return; |
| - } |
| - |
| - ReportModelStatus(MODEL_STATUS_OK); |
| - model_ = std::move(new_model); |
| - model_fetcher_.reset(); |
| +int TranslateRanker::GetModelVersion() const { |
| + base::AutoLock auto_lock(lock_); |
| + return (shared_model_ptr_ == nullptr) ? 0 : shared_model_ptr_->data.version(); |
| } |
| void TranslateRanker::FlushTranslateEvents( |