Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Unified Diff: components/translate/core/browser/translate_ranker.cc

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: Initial CL Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/translate/core/browser/translate_ranker.cc
diff --git a/components/translate/core/browser/translate_ranker.cc b/components/translate/core/browser/translate_ranker.cc
index fe2a214ee397936d1210379f595bfeac9c99b616..19b161e0e85ecc5d33509a6cd0266642ddc01208 100644
--- a/components/translate/core/browser/translate_ranker.cc
+++ b/components/translate/core/browser/translate_ranker.cc
@@ -9,44 +9,50 @@
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/command_line.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
#include "base/metrics/histogram_macros.h"
#include "base/profiler/scoped_tracker.h"
+#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "components/metrics/proto/translate_event.pb.h"
#include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
#include "components/translate/core/browser/translate_download_manager.h"
#include "components/translate/core/browser/translate_prefs.h"
+#include "components/translate/core/browser/translate_ranker_model_loader.h"
#include "components/translate/core/browser/translate_url_fetcher.h"
#include "components/translate/core/common/translate_switches.h"
+#include "components/variations/variations_associated_data.h"
+
+#if defined(OS_WIN)
+#include "base/base_paths_win.h"
+#include "base/path_service.h"
+#include "base/strings/utf_string_conversions.h"
+#elif defined(OS_ANDROID)
+#include "base/base_paths_android.h"
+#include "base/path_service.h"
+#elif defined(OS_LINUX)
+#include "base/environment.h"
+#include "base/nix/xdg_util.h"
+#elif defined(OS_MACOSX)
+#include "base/base_paths_mac.h"
+#include "base/path_service.h"
+#endif
namespace translate {
namespace {
+using chrome_intelligence::TranslateRankerModel;
+
+const char kUserDataDirSwitch[] = "user-data-dir";
pasko 2016/12/19 14:26:49 this looks like the wrong layer to do it. kUserDat
Roger McFarlane (Chromium) 2017/02/08 23:08:07 I've turned the ranker into a KeyedService and con
+const char kTranslateRankerModelFileName[] = "Translate Ranker Model";
typedef google::protobuf::Map<std::string, float> WeightMap;
const double kTranslationOfferThreshold = 0.5;
-// Parameters for model fetching.
-const char kTranslateRankerModelURL[] =
- "https://chromium-i18n.appspot.com/ssl-translate-ranker-model";
-const int kMaxRetryOn5xx = 3;
-const int kDownloadRefractoryPeriodMin = 15;
const char kUnknown[] = "UNKNOWN";
-// Enumeration denoting the outcome of an attempt to download the translate
-// ranker model. This must be kept in sync with the TranslateRankerModelStatus
-// enum in histograms.xml
-enum ModelStatus {
- MODEL_STATUS_OK = 0,
- MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
- MODEL_STATUS_DOWNLOAD_FAILED = 2,
- MODEL_STATUS_PARSE_FAILED = 3,
- MODEL_STATUS_VALIDATION_FAILED = 4,
- // Insert new values above this line.
- MODEL_STATUS_MAX
-};
-
double Sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
@@ -58,17 +64,94 @@ double ScoreComponent(const WeightMap& weights, const std::string& key) {
return i == weights.end() ? 0.0 : i->second;
}
-GURL GetTranslateRankerURL() {
+GURL GetTranslateRankerModelURL() {
+ // Allow override of the ranker model URL from the command line.
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
+ if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) {
+ return GURL(
+ command_line->GetSwitchValueASCII(switches::kTranslateRankerModelURL));
+ }
+
+ // Otherwise take the ranker model URL from the ranker query variation.
+ const std::string raw_url = variations::GetVariationParamValueByFeature(
+ kTranslateRankerQuery, switches::kTranslateRankerModelURL);
+
+ DVLOG(3) << switches::kTranslateRankerModelURL << " = " << raw_url;
+
+ return GURL(raw_url);
+}
+
+base::FilePath GetUserDataDir() {
+ base::FilePath path;
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
- return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL)
- ? command_line->GetSwitchValueASCII(
- switches::kTranslateRankerModelURL)
- : kTranslateRankerModelURL);
+ if (command_line->HasSwitch(kUserDataDirSwitch)) {
+ path = command_line->GetSwitchValuePath(kUserDataDirSwitch);
+ } else {
+#if defined(OS_WIN)
+ CHECK(PathService::Get(base::DIR_LOCAL_APP_DATA, &path));
+#elif defined(OS_MACOSX)
+ CHECK(PathService::Get(base::DIR_APP_DATA, &path));
+#elif defined(OS_ANDROID)
+ CHECK(PathService::Get(base::DIR_ANDROID_APP_DATA, &path));
+#elif defined(OS_LINUX)
+ std::unique_ptr<base::Environment> env(base::Environment::Create());
+ path = base::nix::GetXDGDirectory(
+ env.get(), base::nix::kXdgConfigHomeEnvVar, base::nix::kDotConfigDir);
+#else
+ NOTIMPLEMENTED();
+#endif
+ }
+ return path;
}
-void ReportModelStatus(ModelStatus model_status) {
- UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status,
- MODEL_STATUS_MAX);
+base::FilePath GetTranslateRankerModelPath() {
+ // Allow override of the ranker model path from the command line.
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
+ if (command_line->HasSwitch(switches::kTranslateRankerModelPath)) {
+ return base::FilePath(command_line->GetSwitchValueNative(
+ switches::kTranslateRankerModelPath));
+ }
+
+ // Otherwise, look for the file in the top-level user data dir.
+ return GetUserDataDir().AppendASCII(kTranslateRankerModelFileName);
+}
+
+bool IsUpToDate(const TranslateRankerModel& model) {
+ base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
+
+ // When forcibly loading a model from disk, assume it's up-to-date to skip
+ // downloading a new model over it.
+ if (command_line->HasSwitch(switches::kTranslateRankerModelPath))
+ return true;
+
+ // When forcibly download a model, assume that any model loaded from disk
+ // is out-of-date.
+ if (command_line->HasSwitch(switches::kTranslateRankerModelURL))
+ return false;
+
+ // Otherwise take the expected ranker model URL from the ranker query
+ // variation.
+ const std::string& expected_version_str =
+ variations::GetVariationParamValueByFeature(
+ kTranslateRankerQuery, "translate-ranker-model-version");
+
+ DVLOG(2) << "Expected version = '" << expected_version_str << "'";
+ DVLOG(2) << "Received version = '" << model.version() << "'";
+
+ unsigned expected_version;
+ return base::StringToUint(expected_version_str, &expected_version) &&
+ model.has_version() && model.version() == expected_version;
+}
+
+bool IsCompatible(const chrome_intelligence::TranslateRankerModel& model) {
+ if (!model.has_logistic_regression_model())
+ return false;
+
+ const TranslateRankerModel::LogisticRegressionModel& logit =
+ model.logistic_regression_model();
+
+ return logit.has_bias() && logit.has_accept_ratio_weight() &&
+ logit.has_decline_ratio_weight();
}
} // namespace
@@ -109,15 +192,38 @@ TranslateRanker* TranslateRanker::GetInstance() {
return base::Singleton<TranslateRanker>::get();
}
+// static
std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting(
const std::string& model_data) {
std::unique_ptr<TranslateRanker> ranker(new TranslateRanker());
+ std::unique_ptr<TranslateRankerModel> model(new TranslateRankerModel());
CHECK(ranker != nullptr);
- ranker->ParseModel(0, true, model_data);
- CHECK(ranker->model_ != nullptr);
+ CHECK(model != nullptr);
+ CHECK(model->ParseFromString(model_data));
+ CHECK(IsCompatible(*model));
+ ranker->SetSharedModelPtr(std::move(model));
return ranker;
}
+void TranslateRanker::StartModelLoader() {
+ if (model_loader_)
+ return;
+
+ base::AutoLock auto_lock(lock_);
+
+ if (model_loader_)
+ return;
+
+ model_loader_.reset(new TranslateRankerModelLoader());
+ model_loader_->set_cache_file_path(GetTranslateRankerModelPath())
+ .set_download_url(GetTranslateRankerModelURL())
+ .set_is_compatible_func(base::BindRepeating(&IsCompatible))
+ .set_is_up_to_date_func(base::BindRepeating(&IsUpToDate))
+ .set_on_model_available_func(base::BindRepeating(
+ &TranslateRanker::SetSharedModelPtr, base::Unretained(this)))
+ .Start();
+}
+
bool TranslateRanker::ShouldOfferTranslation(
const TranslatePrefs& translate_prefs,
const std::string& src_lang,
@@ -129,13 +235,14 @@ bool TranslateRanker::ShouldOfferTranslation(
// (or become False).
const bool kDefaultResponse = true;
+ ConstSharedModelPtr model = GetSharedModelPtr();
+
// If we don't have a model, request one and return the default.
- if (model_ == nullptr) {
- FetchModelData();
+ if (model == nullptr) {
return kDefaultResponse;
}
- DCHECK(model_->has_logistic_regression_model());
+ DCHECK(IsCompatible(model->data));
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation");
@@ -150,7 +257,7 @@ bool TranslateRanker::ShouldOfferTranslation(
double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang);
double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang);
double ignored_count =
- model_->logistic_regression_model().has_ignore_ratio_weight()
+ model->data.logistic_regression_model().has_ignore_ratio_weight()
? translate_prefs.GetTranslationIgnoredCount(src_lang)
: 0.0;
double total_count = accept_count + denied_count + ignored_count;
@@ -171,8 +278,9 @@ bool TranslateRanker::ShouldOfferTranslation(
<< ", decline_ratio=" << decline_ratio
<< ", ignore_ratio=" << ignore_ratio << "]";
- double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio,
- src_lang, dst_lang, app_locale, country);
+ double score =
+ CalculateScore(model->data, accept_ratio, decline_ratio, ignore_ratio,
+ src_lang, dst_lang, app_locale, country);
DVLOG(2) << "TranslateRanker Score: " << score;
@@ -185,18 +293,18 @@ bool TranslateRanker::ShouldOfferTranslation(
TranslateRanker::TranslateRanker() {}
-double TranslateRanker::CalculateScore(double accept_ratio,
+double TranslateRanker::CalculateScore(const TranslateRankerModel& model,
+ double accept_ratio,
double decline_ratio,
double ignore_ratio,
const std::string& src_lang,
const std::string& dst_lang,
const std::string& locale,
const std::string& country) {
+ DCHECK(IsCompatible(model));
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore");
- DCHECK(model_ != nullptr);
- DCHECK(model_->has_logistic_regression_model());
const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel&
- logit = model_->logistic_regression_model();
+ logit = model.logistic_regression_model();
double dot_product =
(accept_ratio * logit.accept_ratio_weight()) +
(decline_ratio * logit.decline_ratio_weight()) +
@@ -208,87 +316,29 @@ double TranslateRanker::CalculateScore(double accept_ratio,
return Sigmoid(dot_product + logit.bias());
}
-int TranslateRanker::GetModelVersion() const {
- return (model_ == nullptr) ? 0 : model_->version();
+TranslateRanker::ConstSharedModelPtr TranslateRanker::GetSharedModelPtr()
+ const {
+ base::AutoLock auto_lock(lock_);
+ return shared_model_ptr_;
}
-void TranslateRanker::FetchModelData() {
- // Exit if the model has already been successfully loaded.
- if (model_ != nullptr) {
- return;
- }
-
- // Exit if the download has been throttled.
- if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
- return;
- }
-
- // Create the model fetcher if it does not exist.
- if (model_fetcher_ == nullptr) {
- model_fetcher_.reset(new TranslateURLFetcher(kFetcherId));
- model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
- }
-
- // If a request is already in flight, do not issue a new one.
- if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
- DVLOG(2) << "TranslateRanker: Download complete or in progress.";
- return;
- }
-
- DVLOG(2) << "TranslateRanker: Downloading model...";
+void TranslateRanker::SetSharedModelPtr(
+ std::unique_ptr<TranslateRankerModel> new_model) {
+ DCHECK(!new_model || IsCompatible(*new_model));
- download_start_time_ = base::Time::Now();
- bool result = model_fetcher_->Request(
- GetTranslateRankerURL(),
- base::Bind(&TranslateRanker::ParseModel, base::Unretained(this)));
+ // Create a new shared model instance and swap the model contents into it.
+ SharedModelPtr new_shared_model_ptr(
+ new base::RefCountedData<TranslateRankerModel>());
+ new_shared_model_ptr->data.Swap(new_model.get());
- if (!result) {
- ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED);
- next_earliest_download_time_ =
- base::Time::NowFromSystemTime() +
- base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
- }
+ // Grab the lock and update the shared model pointer.
+ base::AutoLock auto_lock(lock_);
+ shared_model_ptr_ = new_shared_model_ptr;
}
-void TranslateRanker::ParseModel(int /* id */,
- bool success,
- const std::string& data) {
- UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel",
- base::Time::Now() - download_start_time_);
-
- SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel");
-
- // We should not be here if the model has already been downloaded and parsed.
- DCHECK(model_ == nullptr);
-
- // On failure, we just abort. The TranslateRanker will retry on a subsequent
- // translation opportunity. The TranslateURLFetcher enforces a limit for
- // retried requests.
- if (!success) {
- ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
- return;
- }
-
- // Create a new model instance, parse and validate the data, and move it over
- // to be used by the ranker.
- std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model(
- new chrome_intelligence::TranslateRankerModel());
-
- bool is_parseable = new_model->ParseFromString(data);
- if (!is_parseable) {
- ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
- return;
- }
-
- bool is_valid = new_model->has_logistic_regression_model();
- if (!is_valid) {
- ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
- return;
- }
-
- ReportModelStatus(MODEL_STATUS_OK);
- model_ = std::move(new_model);
- model_fetcher_.reset();
+int TranslateRanker::GetModelVersion() const {
+ base::AutoLock auto_lock(lock_);
+ return (shared_model_ptr_ == nullptr) ? 0 : shared_model_ptr_->data.version();
}
void TranslateRanker::FlushTranslateEvents(

Powered by Google App Engine
This is Rietveld 408576698