Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: components/translate/core/browser/translate_ranker.cc

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: comments from hamelphi Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/translate/core/browser/translate_ranker.h"
6
7 #include <cmath>
8
9 #include "base/bind.h"
10 #include "base/bind_helpers.h"
11 #include "base/command_line.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/profiler/scoped_tracker.h"
14 #include "base/strings/string_util.h"
15 #include "components/metrics/proto/translate_event.pb.h"
16 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
17 #include "components/translate/core/browser/translate_download_manager.h"
18 #include "components/translate/core/browser/translate_prefs.h"
19 #include "components/translate/core/browser/translate_url_fetcher.h"
20 #include "components/translate/core/common/translate_switches.h"
21
22 namespace translate {
23
24 namespace {
25
26 typedef google::protobuf::Map<std::string, float> WeightMap;
27
28 const double kTranslationOfferThreshold = 0.5;
29
30 // Parameters for model fetching.
31 const char kTranslateRankerModelURL[] =
32 "https://chromium-i18n.appspot.com/ssl-translate-ranker-model";
33 const int kMaxRetryOn5xx = 3;
34 const int kDownloadRefractoryPeriodMin = 15;
35 const char kUnknown[] = "UNKNOWN";
36
37 // Enumeration denoting the outcome of an attempt to download the translate
38 // ranker model. This must be kept in sync with the TranslateRankerModelStatus
39 // enum in histograms.xml
40 enum ModelStatus {
41 MODEL_STATUS_OK = 0,
42 MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
43 MODEL_STATUS_DOWNLOAD_FAILED = 2,
44 MODEL_STATUS_PARSE_FAILED = 3,
45 MODEL_STATUS_VALIDATION_FAILED = 4,
46 // Insert new values above this line.
47 MODEL_STATUS_MAX
48 };
49
50 double Sigmoid(double x) {
51 return 1.0 / (1.0 + exp(-x));
52 }
53
54 double ScoreComponent(const WeightMap& weights, const std::string& key) {
55 WeightMap::const_iterator i = weights.find(base::ToLowerASCII(key));
56 if (i == weights.end())
57 i = weights.find(kUnknown);
58 return i == weights.end() ? 0.0 : i->second;
59 }
60
61 GURL GetTranslateRankerURL() {
62 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
63 return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL)
64 ? command_line->GetSwitchValueASCII(
65 switches::kTranslateRankerModelURL)
66 : kTranslateRankerModelURL);
67 }
68
69 void ReportModelStatus(ModelStatus model_status) {
70 UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status,
71 MODEL_STATUS_MAX);
72 }
73
74 } // namespace
75
76 const base::Feature kTranslateRankerQuery{"TranslateRankerQuery",
77 base::FEATURE_DISABLED_BY_DEFAULT};
78
79 const base::Feature kTranslateRankerEnforcement{
80 "TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT};
81
82 const base::Feature kTranslateRankerLogging{"TranslateRankerLogging",
83 base::FEATURE_DISABLED_BY_DEFAULT};
84
85 TranslateRanker::~TranslateRanker() {}
86
87 // static
88 bool TranslateRanker::IsEnabled() {
89 return IsQueryEnabled() || IsEnforcementEnabled();
90 }
91
92 // static
93 bool TranslateRanker::IsLoggingEnabled() {
94 return base::FeatureList::IsEnabled(kTranslateRankerLogging);
95 }
96
97 // static
98 bool TranslateRanker::IsQueryEnabled() {
99 return base::FeatureList::IsEnabled(kTranslateRankerQuery);
100 }
101
102 // static
103 bool TranslateRanker::IsEnforcementEnabled() {
104 return base::FeatureList::IsEnabled(kTranslateRankerEnforcement);
105 }
106
107 // static
108 TranslateRanker* TranslateRanker::GetInstance() {
109 return base::Singleton<TranslateRanker>::get();
110 }
111
112 std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting(
113 const std::string& model_data) {
114 std::unique_ptr<TranslateRanker> ranker(new TranslateRanker());
115 CHECK(ranker != nullptr);
116 ranker->ParseModel(0, true, model_data);
117 CHECK(ranker->model_ != nullptr);
118 return ranker;
119 }
120
121 bool TranslateRanker::ShouldOfferTranslation(
122 const TranslatePrefs& translate_prefs,
123 const std::string& src_lang,
124 const std::string& dst_lang) {
125 // The ranker is a gate in the "show a translation prompt" flow. To retain
126 // the pre-existing functionality, it defaults to returning true in the
127 // absence of a model or if enforcement is disabled. As this is ranker is
128 // subsumed into a more general assist ranker, this default will go away
129 // (or become False).
130 const bool kDefaultResponse = true;
131
132 // If we don't have a model, request one and return the default.
133 if (model_ == nullptr) {
134 FetchModelData();
135 return kDefaultResponse;
136 }
137
138 DCHECK(model_->has_logistic_regression_model());
139
140 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation");
141
142 // TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed.
143 tracked_objects::ScopedTracker tracking_profile(
144 FROM_HERE_WITH_EXPLICIT_FUNCTION(
145 "646711 translate::TranslateRanker::ShouldOfferTranslation"));
146
147 const std::string& app_locale =
148 TranslateDownloadManager::GetInstance()->application_locale();
149 const std::string& country = translate_prefs.GetCountry();
150 double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang);
151 double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang);
152 double ignored_count =
153 model_->logistic_regression_model().has_ignore_ratio_weight()
154 ? translate_prefs.GetTranslationIgnoredCount(src_lang)
155 : 0.0;
156 double total_count = accept_count + denied_count + ignored_count;
157 double accept_ratio =
158 (total_count == 0.0) ? 0.0 : (accept_count / total_count);
159 double decline_ratio =
160 (total_count == 0.0) ? 0.0 : (denied_count / total_count);
161 double ignore_ratio =
162 (total_count == 0.0) ? 0.0 : (ignored_count / total_count);
163 DVLOG(3) << "TranslateRanker: features=["
164 << "src_lang='" << src_lang << "', dst_lang='" << dst_lang
165 << "', country='" << country << "', locale='" << app_locale
166 << ", accept_count=" << accept_count
167 << ", denied_count=" << denied_count
168 << ", ignored_count=" << ignored_count
169 << ", total_count=" << total_count
170 << ", accept_ratio=" << accept_ratio
171 << ", decline_ratio=" << decline_ratio
172 << ", ignore_ratio=" << ignore_ratio << "]";
173
174 double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio,
175 src_lang, dst_lang, app_locale, country);
176
177 DVLOG(2) << "TranslateRanker Score: " << score;
178
179 bool result = (score >= kTranslationOfferThreshold);
180
181 UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result);
182
183 return result;
184 }
185
186 TranslateRanker::TranslateRanker() {}
187
188 double TranslateRanker::CalculateScore(double accept_ratio,
189 double decline_ratio,
190 double ignore_ratio,
191 const std::string& src_lang,
192 const std::string& dst_lang,
193 const std::string& locale,
194 const std::string& country) {
195 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore");
196 DCHECK(model_ != nullptr);
197 DCHECK(model_->has_logistic_regression_model());
198 const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel&
199 logit = model_->logistic_regression_model();
200 double dot_product =
201 (accept_ratio * logit.accept_ratio_weight()) +
202 (decline_ratio * logit.decline_ratio_weight()) +
203 (ignore_ratio * logit.ignore_ratio_weight()) +
204 ScoreComponent(logit.source_language_weight(), src_lang) +
205 ScoreComponent(logit.dest_language_weight(), dst_lang) +
206 ScoreComponent(logit.country_weight(), country) +
207 ScoreComponent(logit.locale_weight(), locale);
208 return Sigmoid(dot_product + logit.bias());
209 }
210
211 int TranslateRanker::GetModelVersion() const {
212 return (model_ == nullptr) ? 0 : model_->version();
213 }
214
215 void TranslateRanker::FetchModelData() {
216 // Exit if the model has already been successfully loaded.
217 if (model_ != nullptr) {
218 return;
219 }
220
221 // Exit if the download has been throttled.
222 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
223 return;
224 }
225
226 // Create the model fetcher if it does not exist.
227 if (model_fetcher_ == nullptr) {
228 model_fetcher_.reset(new TranslateURLFetcher(kFetcherId));
229 model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
230 }
231
232 // If a request is already in flight, do not issue a new one.
233 if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
234 DVLOG(2) << "TranslateRanker: Download complete or in progress.";
235 return;
236 }
237
238 DVLOG(2) << "TranslateRanker: Downloading model...";
239
240 download_start_time_ = base::Time::Now();
241 bool result = model_fetcher_->Request(
242 GetTranslateRankerURL(),
243 base::Bind(&TranslateRanker::ParseModel, base::Unretained(this)));
244
245 if (!result) {
246 ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED);
247 next_earliest_download_time_ =
248 base::Time::NowFromSystemTime() +
249 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
250 }
251 }
252
253 void TranslateRanker::ParseModel(int /* id */,
254 bool success,
255 const std::string& data) {
256 UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel",
257 base::Time::Now() - download_start_time_);
258
259 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel");
260
261 // We should not be here if the model has already been downloaded and parsed.
262 DCHECK(model_ == nullptr);
263
264 // On failure, we just abort. The TranslateRanker will retry on a subsequent
265 // translation opportunity. The TranslateURLFetcher enforces a limit for
266 // retried requests.
267 if (!success) {
268 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
269 return;
270 }
271
272 // Create a new model instance, parse and validate the data, and move it over
273 // to be used by the ranker.
274 std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model(
275 new chrome_intelligence::TranslateRankerModel());
276
277 bool is_parseable = new_model->ParseFromString(data);
278 if (!is_parseable) {
279 ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
280 return;
281 }
282
283 bool is_valid = new_model->has_logistic_regression_model();
284 if (!is_valid) {
285 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
286 return;
287 }
288
289 ReportModelStatus(MODEL_STATUS_OK);
290 model_ = std::move(new_model);
291 model_fetcher_.reset();
292 }
293
294 void TranslateRanker::FlushTranslateEvents(
295 std::vector<metrics::TranslateEventProto>* translate_events) {
296 if (IsLoggingEnabled()) {
297 translate_events->swap(translate_events_cache_);
298 translate_events_cache_.clear();
299 }
300 }
301
302 void TranslateRanker::RecordTranslateEvent(
303 const metrics::TranslateEventProto& translate_event) {
304 if (IsLoggingEnabled())
305 translate_events_cache_.push_back(translate_event);
306 }
307
308 } // namespace translate
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698