Index: components/translate/core/browser/translate_ranker.cc |
diff --git a/components/translate/core/browser/translate_ranker.cc b/components/translate/core/browser/translate_ranker.cc |
index fe2a214ee397936d1210379f595bfeac9c99b616..19b161e0e85ecc5d33509a6cd0266642ddc01208 100644 |
--- a/components/translate/core/browser/translate_ranker.cc |
+++ b/components/translate/core/browser/translate_ranker.cc |
@@ -9,44 +9,50 @@ |
#include "base/bind.h" |
#include "base/bind_helpers.h" |
#include "base/command_line.h" |
+#include "base/files/file_path.h" |
+#include "base/files/file_util.h" |
#include "base/metrics/histogram_macros.h" |
#include "base/profiler/scoped_tracker.h" |
+#include "base/strings/string_number_conversions.h" |
#include "base/strings/string_util.h" |
#include "components/metrics/proto/translate_event.pb.h" |
#include "components/translate/core/browser/proto/translate_ranker_model.pb.h" |
#include "components/translate/core/browser/translate_download_manager.h" |
#include "components/translate/core/browser/translate_prefs.h" |
+#include "components/translate/core/browser/translate_ranker_model_loader.h" |
#include "components/translate/core/browser/translate_url_fetcher.h" |
#include "components/translate/core/common/translate_switches.h" |
+#include "components/variations/variations_associated_data.h" |
+ |
+#if defined(OS_WIN) |
+#include "base/base_paths_win.h" |
+#include "base/path_service.h" |
+#include "base/strings/utf_string_conversions.h" |
+#elif defined(OS_ANDROID) |
+#include "base/base_paths_android.h" |
+#include "base/path_service.h" |
+#elif defined(OS_LINUX) |
+#include "base/environment.h" |
+#include "base/nix/xdg_util.h" |
+#elif defined(OS_MACOSX) |
+#include "base/base_paths_mac.h" |
+#include "base/path_service.h" |
+#endif |
namespace translate { |
namespace { |
+using chrome_intelligence::TranslateRankerModel; |
+ |
+const char kUserDataDirSwitch[] = "user-data-dir"; |
pasko
2016/12/19 14:26:49
this looks like the wrong layer to do it. kUserDat
Roger McFarlane (Chromium)
2017/02/08 23:08:07
I've turned the ranker into a KeyedService and con
|
+const char kTranslateRankerModelFileName[] = "Translate Ranker Model"; |
typedef google::protobuf::Map<std::string, float> WeightMap; |
const double kTranslationOfferThreshold = 0.5; |
-// Parameters for model fetching. |
-const char kTranslateRankerModelURL[] = |
- "https://chromium-i18n.appspot.com/ssl-translate-ranker-model"; |
-const int kMaxRetryOn5xx = 3; |
-const int kDownloadRefractoryPeriodMin = 15; |
const char kUnknown[] = "UNKNOWN"; |
-// Enumeration denoting the outcome of an attempt to download the translate |
-// ranker model. This must be kept in sync with the TranslateRankerModelStatus |
-// enum in histograms.xml |
-enum ModelStatus { |
- MODEL_STATUS_OK = 0, |
- MODEL_STATUS_DOWNLOAD_THROTTLED = 1, |
- MODEL_STATUS_DOWNLOAD_FAILED = 2, |
- MODEL_STATUS_PARSE_FAILED = 3, |
- MODEL_STATUS_VALIDATION_FAILED = 4, |
- // Insert new values above this line. |
- MODEL_STATUS_MAX |
-}; |
- |
double Sigmoid(double x) { |
return 1.0 / (1.0 + exp(-x)); |
} |
@@ -58,17 +64,94 @@ double ScoreComponent(const WeightMap& weights, const std::string& key) { |
return i == weights.end() ? 0.0 : i->second; |
} |
-GURL GetTranslateRankerURL() { |
+GURL GetTranslateRankerModelURL() { |
+ // Allow override of the ranker model URL from the command line. |
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
+ if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) { |
+ return GURL( |
+ command_line->GetSwitchValueASCII(switches::kTranslateRankerModelURL)); |
+ } |
+ |
+ // Otherwise take the ranker model URL from the ranker query variation. |
+ const std::string raw_url = variations::GetVariationParamValueByFeature( |
+ kTranslateRankerQuery, switches::kTranslateRankerModelURL); |
+ |
+ DVLOG(3) << switches::kTranslateRankerModelURL << " = " << raw_url; |
+ |
+ return GURL(raw_url); |
+} |
+ |
+base::FilePath GetUserDataDir() { |
+ base::FilePath path; |
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
- return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL) |
- ? command_line->GetSwitchValueASCII( |
- switches::kTranslateRankerModelURL) |
- : kTranslateRankerModelURL); |
+ if (command_line->HasSwitch(kUserDataDirSwitch)) { |
+ path = command_line->GetSwitchValuePath(kUserDataDirSwitch); |
+ } else { |
+#if defined(OS_WIN) |
+ CHECK(PathService::Get(base::DIR_LOCAL_APP_DATA, &path)); |
+#elif defined(OS_MACOSX) |
+ CHECK(PathService::Get(base::DIR_APP_DATA, &path)); |
+#elif defined(OS_ANDROID) |
+ CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA, &path)); |
+#elif defined(OS_LINUX) |
+ std::unique_ptr<base::Environment> env(base::Environment::Create()); |
+ path = base::nix::GetXDGDirectory( |
+ env.get(), base::nix::kXdgConfigHomeEnvVar, base::nix::kDotConfigDir); |
+#else |
+ NOTIMPLEMENTED(); |
+#endif |
+ } |
+ return path; |
} |
-void ReportModelStatus(ModelStatus model_status) { |
- UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status, |
- MODEL_STATUS_MAX); |
+base::FilePath GetTranslateRankerModelPath() { |
+ // Allow override of the ranker model path from the command line. |
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
+ if (command_line->HasSwitch(switches::kTranslateRankerModelPath)) { |
+ return base::FilePath(command_line->GetSwitchValueNative( |
+ switches::kTranslateRankerModelPath)); |
+ } |
+ |
+ // Otherwise, look for the file in the top-level user data dir. |
+ return GetUserDataDir().AppendASCII(kTranslateRankerModelFileName); |
+} |
+ |
+bool IsUpToDate(const TranslateRankerModel& model) { |
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); |
+ |
+ // When forcibly loading a model from disk, assume it's up-to-date to skip |
+ // downloading a new model over it. |
+ if (command_line->HasSwitch(switches::kTranslateRankerModelPath)) |
+ return true; |
+ |
+ // When forcibly download a model, assume that any model loaded from disk |
+ // is out-of-date. |
+ if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) |
+ return false; |
+ |
+ // Otherwise take the expected ranker model URL from the ranker query |
+ // variation. |
+ const std::string& expected_version_str = |
+ variations::GetVariationParamValueByFeature( |
+ kTranslateRankerQuery, "translate-ranker-model-version"); |
+ |
+ DVLOG(2) << "Expected version = '" << expected_version_str << "'"; |
+ DVLOG(2) << "Received version = '" << model.version() << "'"; |
+ |
+ unsigned expected_version; |
+ return base::StringToUint(expected_version_str, &expected_version) && |
+ model.has_version() && model.version() == expected_version; |
+} |
+ |
+bool IsCompatible(const chrome_intelligence::TranslateRankerModel& model) { |
+ if (!model.has_logistic_regression_model()) |
+ return false; |
+ |
+ const TranslateRankerModel::LogisticRegressionModel& logit = |
+ model.logistic_regression_model(); |
+ |
+ return logit.has_bias() && logit.has_accept_ratio_weight() && |
+ logit.has_decline_ratio_weight(); |
} |
} // namespace |
@@ -109,15 +192,38 @@ TranslateRanker* TranslateRanker::GetInstance() { |
return base::Singleton<TranslateRanker>::get(); |
} |
+// static |
std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting( |
const std::string& model_data) { |
std::unique_ptr<TranslateRanker> ranker(new TranslateRanker()); |
+ std::unique_ptr<TranslateRankerModel> model(new TranslateRankerModel()); |
CHECK(ranker != nullptr); |
- ranker->ParseModel(0, true, model_data); |
- CHECK(ranker->model_ != nullptr); |
+ CHECK(model != nullptr); |
+ CHECK(model->ParseFromString(model_data)); |
+ CHECK(IsCompatible(*model)); |
+ ranker->SetSharedModelPtr(std::move(model)); |
return ranker; |
} |
+void TranslateRanker::StartModelLoader() { |
+ if (model_loader_) |
+ return; |
+ |
+ base::AutoLock auto_lock(lock_); |
+ |
+ if (model_loader_) |
+ return; |
+ |
+ model_loader_.reset(new TranslateRankerModelLoader()); |
+ model_loader_->set_cache_file_path(GetTranslateRankerModelPath()) |
+ .set_download_url(GetTranslateRankerModelURL()) |
+ .set_is_compatible_func(base::BindRepeating(&IsCompatible)) |
+ .set_is_up_to_date_func(base::BindRepeating(&IsUpToDate)) |
+ .set_on_model_available_func(base::BindRepeating( |
+ &TranslateRanker::SetSharedModelPtr, base::Unretained(this))) |
+ .Start(); |
+} |
+ |
bool TranslateRanker::ShouldOfferTranslation( |
const TranslatePrefs& translate_prefs, |
const std::string& src_lang, |
@@ -129,13 +235,14 @@ bool TranslateRanker::ShouldOfferTranslation( |
// (or become False). |
const bool kDefaultResponse = true; |
+ ConstSharedModelPtr model = GetSharedModelPtr(); |
+ |
// If we don't have a model, request one and return the default. |
- if (model_ == nullptr) { |
- FetchModelData(); |
+ if (model == nullptr) { |
return kDefaultResponse; |
} |
- DCHECK(model_->has_logistic_regression_model()); |
+ DCHECK(IsCompatible(model->data)); |
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation"); |
@@ -150,7 +257,7 @@ bool TranslateRanker::ShouldOfferTranslation( |
double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang); |
double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang); |
double ignored_count = |
- model_->logistic_regression_model().has_ignore_ratio_weight() |
+ model->data.logistic_regression_model().has_ignore_ratio_weight() |
? translate_prefs.GetTranslationIgnoredCount(src_lang) |
: 0.0; |
double total_count = accept_count + denied_count + ignored_count; |
@@ -171,8 +278,9 @@ bool TranslateRanker::ShouldOfferTranslation( |
<< ", decline_ratio=" << decline_ratio |
<< ", ignore_ratio=" << ignore_ratio << "]"; |
- double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio, |
- src_lang, dst_lang, app_locale, country); |
+ double score = |
+ CalculateScore(model->data, accept_ratio, decline_ratio, ignore_ratio, |
+ src_lang, dst_lang, app_locale, country); |
DVLOG(2) << "TranslateRanker Score: " << score; |
@@ -185,18 +293,18 @@ bool TranslateRanker::ShouldOfferTranslation( |
TranslateRanker::TranslateRanker() {} |
-double TranslateRanker::CalculateScore(double accept_ratio, |
+double TranslateRanker::CalculateScore(const TranslateRankerModel& model, |
+ double accept_ratio, |
double decline_ratio, |
double ignore_ratio, |
const std::string& src_lang, |
const std::string& dst_lang, |
const std::string& locale, |
const std::string& country) { |
+ DCHECK(IsCompatible(model)); |
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore"); |
- DCHECK(model_ != nullptr); |
- DCHECK(model_->has_logistic_regression_model()); |
const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel& |
- logit = model_->logistic_regression_model(); |
+ logit = model.logistic_regression_model(); |
double dot_product = |
(accept_ratio * logit.accept_ratio_weight()) + |
(decline_ratio * logit.decline_ratio_weight()) + |
@@ -208,87 +316,29 @@ double TranslateRanker::CalculateScore(double accept_ratio, |
return Sigmoid(dot_product + logit.bias()); |
} |
-int TranslateRanker::GetModelVersion() const { |
- return (model_ == nullptr) ? 0 : model_->version(); |
+TranslateRanker::ConstSharedModelPtr TranslateRanker::GetSharedModelPtr() |
+ const { |
+ base::AutoLock auto_lock(lock_); |
+ return shared_model_ptr_; |
} |
-void TranslateRanker::FetchModelData() { |
- // Exit if the model has already been successfully loaded. |
- if (model_ != nullptr) { |
- return; |
- } |
- |
- // Exit if the download has been throttled. |
- if (base::Time::NowFromSystemTime() < next_earliest_download_time_) { |
- return; |
- } |
- |
- // Create the model fetcher if it does not exist. |
- if (model_fetcher_ == nullptr) { |
- model_fetcher_.reset(new TranslateURLFetcher(kFetcherId)); |
- model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); |
- } |
- |
- // If a request is already in flight, do not issue a new one. |
- if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) { |
- DVLOG(2) << "TranslateRanker: Download complete or in progress."; |
- return; |
- } |
- |
- DVLOG(2) << "TranslateRanker: Downloading model..."; |
+void TranslateRanker::SetSharedModelPtr( |
+ std::unique_ptr<TranslateRankerModel> new_model) { |
+ DCHECK(!new_model || IsCompatible(*new_model)); |
- download_start_time_ = base::Time::Now(); |
- bool result = model_fetcher_->Request( |
- GetTranslateRankerURL(), |
- base::Bind(&TranslateRanker::ParseModel, base::Unretained(this))); |
+ // Create a new shared model instance and swap the model contents into it. |
+ SharedModelPtr new_shared_model_ptr( |
+ new base::RefCountedData<TranslateRankerModel>()); |
+ new_shared_model_ptr->data.Swap(new_model.get()); |
- if (!result) { |
- ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED); |
- next_earliest_download_time_ = |
- base::Time::NowFromSystemTime() + |
- base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); |
- } |
+ // Grab the lock and update the shared model pointer. |
+ base::AutoLock auto_lock(lock_); |
+ shared_model_ptr_ = new_shared_model_ptr; |
} |
-void TranslateRanker::ParseModel(int /* id */, |
- bool success, |
- const std::string& data) { |
- UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel", |
- base::Time::Now() - download_start_time_); |
- |
- SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel"); |
- |
- // We should not be here if the model has already been downloaded and parsed. |
- DCHECK(model_ == nullptr); |
- |
- // On failure, we just abort. The TranslateRanker will retry on a subsequent |
- // translation opportunity. The TranslateURLFetcher enforces a limit for |
- // retried requests. |
- if (!success) { |
- ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED); |
- return; |
- } |
- |
- // Create a new model instance, parse and validate the data, and move it over |
- // to be used by the ranker. |
- std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model( |
- new chrome_intelligence::TranslateRankerModel()); |
- |
- bool is_parseable = new_model->ParseFromString(data); |
- if (!is_parseable) { |
- ReportModelStatus(MODEL_STATUS_PARSE_FAILED); |
- return; |
- } |
- |
- bool is_valid = new_model->has_logistic_regression_model(); |
- if (!is_valid) { |
- ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED); |
- return; |
- } |
- |
- ReportModelStatus(MODEL_STATUS_OK); |
- model_ = std::move(new_model); |
- model_fetcher_.reset(); |
+int TranslateRanker::GetModelVersion() const { |
+ base::AutoLock auto_lock(lock_); |
+ return (shared_model_ptr_ == nullptr) ? 0 : shared_model_ptr_->data.version(); |
} |
void TranslateRanker::FlushTranslateEvents( |