Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(620)

Side by Side Diff: components/translate/core/browser/translate_ranker.cc

Issue 2565873002: [translate] Add translate ranker model loader. (Closed)
Patch Set: comments from sdefresne Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/translate/core/browser/translate_ranker.h"
6
7 #include <cmath>
8
9 #include "base/bind.h"
10 #include "base/bind_helpers.h"
11 #include "base/command_line.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/profiler/scoped_tracker.h"
14 #include "base/strings/string_util.h"
15 #include "components/metrics/proto/translate_event.pb.h"
16 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
17 #include "components/translate/core/browser/translate_download_manager.h"
18 #include "components/translate/core/browser/translate_prefs.h"
19 #include "components/translate/core/browser/translate_url_fetcher.h"
20 #include "components/translate/core/common/translate_switches.h"
21
22 namespace translate {
23
24 namespace {
25
26 typedef google::protobuf::Map<std::string, float> WeightMap;
27
28 const double kTranslationOfferThreshold = 0.5;
29
30 // Parameters for model fetching.
31 const char kTranslateRankerModelURL[] =
32 "https://chromium-i18n.appspot.com/ssl-translate-ranker-model";
33 const int kMaxRetryOn5xx = 3;
34 const int kDownloadRefractoryPeriodMin = 15;
35 const char kUnknown[] = "UNKNOWN";
36
37 // Enumeration denoting the outcome of an attempt to download the translate
38 // ranker model. This must be kept in sync with the TranslateRankerModelStatus
39 // enum in histograms.xml
40 enum ModelStatus {
41 MODEL_STATUS_OK = 0,
42 MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
43 MODEL_STATUS_DOWNLOAD_FAILED = 2,
44 MODEL_STATUS_PARSE_FAILED = 3,
45 MODEL_STATUS_VALIDATION_FAILED = 4,
46 // Insert new values above this line.
47 MODEL_STATUS_MAX
48 };
49
50 double Sigmoid(double x) {
51 return 1.0 / (1.0 + exp(-x));
52 }
53
54 double ScoreComponent(const WeightMap& weights, const std::string& key) {
55 WeightMap::const_iterator i = weights.find(base::ToLowerASCII(key));
56 if (i == weights.end())
57 i = weights.find(kUnknown);
58 return i == weights.end() ? 0.0 : i->second;
59 }
60
61 GURL GetTranslateRankerURL() {
62 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
63 return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL)
64 ? command_line->GetSwitchValueASCII(
65 switches::kTranslateRankerModelURL)
66 : kTranslateRankerModelURL);
67 }
68
69 void ReportModelStatus(ModelStatus model_status) {
70 UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status,
71 MODEL_STATUS_MAX);
72 }
73
74 } // namespace
75
76 const base::Feature kTranslateRankerQuery{"TranslateRankerQuery",
77 base::FEATURE_DISABLED_BY_DEFAULT};
78
79 const base::Feature kTranslateRankerEnforcement{
80 "TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT};
81
82 const base::Feature kTranslateRankerLogging{"TranslateRankerLogging",
83 base::FEATURE_DISABLED_BY_DEFAULT};
84
85 TranslateRanker::~TranslateRanker() {}
86
87 // static
88 bool TranslateRanker::IsEnabled() {
89 return IsQueryEnabled() || IsEnforcementEnabled();
90 }
91
92 // static
93 bool TranslateRanker::IsLoggingEnabled() {
94 return base::FeatureList::IsEnabled(kTranslateRankerLogging);
95 }
96
97 // static
98 bool TranslateRanker::IsQueryEnabled() {
99 return base::FeatureList::IsEnabled(kTranslateRankerQuery);
100 }
101
102 // static
103 bool TranslateRanker::IsEnforcementEnabled() {
104 return base::FeatureList::IsEnabled(kTranslateRankerEnforcement);
105 }
106
107 // static
108 TranslateRanker* TranslateRanker::GetInstance() {
109 return base::Singleton<TranslateRanker>::get();
110 }
111
112 std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting(
113 const std::string& model_data) {
114 std::unique_ptr<TranslateRanker> ranker(new TranslateRanker());
115 CHECK(ranker != nullptr);
116 ranker->ParseModel(0, true, model_data);
117 CHECK(ranker->model_ != nullptr);
118 return ranker;
119 }
120
121 bool TranslateRanker::ShouldOfferTranslation(
122 const TranslatePrefs& translate_prefs,
123 const std::string& src_lang,
124 const std::string& dst_lang) {
125 // The ranker is a gate in the "show a translation prompt" flow. To retain
126 // the pre-existing functionality, it defaults to returning true in the
127 // absence of a model or if enforcement is disabled. As this is ranker is
128 // subsumed into a more general assist ranker, this default will go away
129 // (or become False).
130 const bool kDefaultResponse = true;
131
132 // If we don't have a model, request one and return the default.
133 if (model_ == nullptr) {
134 FetchModelData();
135 return kDefaultResponse;
136 }
137
138 DCHECK(model_->has_logistic_regression_model());
139 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation");
140
141 // TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed.
142 tracked_objects::ScopedTracker tracking_profile(
143 FROM_HERE_WITH_EXPLICIT_FUNCTION(
144 "646711 translate::TranslateRanker::ShouldOfferTranslation"));
145
146 const std::string& app_locale =
147 TranslateDownloadManager::GetInstance()->application_locale();
148 const std::string& country = translate_prefs.GetCountry();
149 int accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang);
150 int decline_count = translate_prefs.GetTranslationDeniedCount(src_lang);
151 int ignore_count = translate_prefs.GetTranslationIgnoredCount(src_lang);
152 DVLOG(3) << "TranslateRanker: features=["
153 << "src_lang='" << src_lang << "', dst_lang='" << dst_lang
154 << "', country='" << country << "', locale='" << app_locale
155 << ", accept_count=" << accept_count
156 << ", decline_count=" << decline_count
157 << ", ignore_count=" << ignore_count << "]";
158
159 double score = CalculateScore(accept_count, decline_count, ignore_count,
160 src_lang, dst_lang, app_locale, country);
161
162 DVLOG(2) << "TranslateRanker Score: " << score;
163
164 bool result = (score >= kTranslationOfferThreshold);
165
166 UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result);
167
168 return result;
169 }
170
171 TranslateRanker::TranslateRanker() {}
172
173 double TranslateRanker::CalculateScore(int accept_count,
174 int decline_count,
175 int ignore_count,
176 const std::string& src_lang,
177 const std::string& dst_lang,
178 const std::string& locale,
179 const std::string& country) {
180 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore");
181 DCHECK(model_ != nullptr);
182 DCHECK(model_->has_logistic_regression_model());
183
184 int total_count = accept_count + decline_count + ignore_count;
185 double accept_ratio =
186 (total_count == 0) ? 0.0 : (double(accept_count) / total_count);
187 double decline_ratio =
188 (total_count == 0) ? 0.0 : (double(decline_count) / total_count);
189 double ignore_ratio =
190 (total_count == 0) ? 0.0 : (double(ignore_count) / total_count);
191 DVLOG(3) << "TranslateRanker: ratios=["
192 << ", accept_ratio=" << accept_ratio
193 << ", decline_ratio=" << decline_ratio
194 << ", ignore_ratio=" << ignore_ratio << "]";
195
196 const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel&
197 logit = model_->logistic_regression_model();
198 double dot_product =
199 (accept_ratio * logit.accept_ratio_weight()) +
200 (decline_ratio * logit.decline_ratio_weight()) +
201 (ignore_ratio * logit.ignore_ratio_weight()) +
202 (accept_count * logit.accept_count_weight()) +
203 (decline_count * logit.decline_count_weight()) +
204 (ignore_count * logit.ignore_count_weight()) +
205 ScoreComponent(logit.source_language_weight(), src_lang) +
206 ScoreComponent(logit.dest_language_weight(), dst_lang) +
207 ScoreComponent(logit.country_weight(), country) +
208 ScoreComponent(logit.locale_weight(), locale);
209 return Sigmoid(dot_product + logit.bias());
210 }
211
212 int TranslateRanker::GetModelVersion() const {
213 return (model_ == nullptr) ? 0 : model_->version();
214 }
215
216 void TranslateRanker::FetchModelData() {
217 // Exit if the model has already been successfully loaded.
218 if (model_ != nullptr) {
219 return;
220 }
221
222 // Exit if the download has been throttled.
223 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
224 return;
225 }
226
227 // Create the model fetcher if it does not exist.
228 if (model_fetcher_ == nullptr) {
229 model_fetcher_.reset(new TranslateURLFetcher(kFetcherId));
230 model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
231 }
232
233 // If a request is already in flight, do not issue a new one.
234 if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
235 DVLOG(2) << "TranslateRanker: Download complete or in progress.";
236 return;
237 }
238
239 DVLOG(2) << "TranslateRanker: Downloading model...";
240
241 download_start_time_ = base::Time::Now();
242 bool result = model_fetcher_->Request(
243 GetTranslateRankerURL(),
244 base::Bind(&TranslateRanker::ParseModel, base::Unretained(this)));
245
246 if (!result) {
247 ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED);
248 next_earliest_download_time_ =
249 base::Time::NowFromSystemTime() +
250 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
251 }
252 }
253
254 void TranslateRanker::ParseModel(int /* id */,
255 bool success,
256 const std::string& data) {
257 UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel",
258 base::Time::Now() - download_start_time_);
259
260 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel");
261
262 // We should not be here if the model has already been downloaded and parsed.
263 DCHECK(model_ == nullptr);
264
265 // On failure, we just abort. The TranslateRanker will retry on a subsequent
266 // translation opportunity. The TranslateURLFetcher enforces a limit for
267 // retried requests.
268 if (!success) {
269 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
270 return;
271 }
272
273 // Create a new model instance, parse and validate the data, and move it over
274 // to be used by the ranker.
275 std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model(
276 new chrome_intelligence::TranslateRankerModel());
277
278 bool is_parseable = new_model->ParseFromString(data);
279 if (!is_parseable) {
280 ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
281 return;
282 }
283
284 bool is_valid = new_model->has_logistic_regression_model();
285 if (!is_valid) {
286 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
287 return;
288 }
289
290 ReportModelStatus(MODEL_STATUS_OK);
291 model_ = std::move(new_model);
292 model_fetcher_.reset();
293
294 DVLOG(3) << "Successfully loaded model version " << GetModelVersion() << ".";
295 }
296
297 void TranslateRanker::FlushTranslateEvents(
298 std::vector<metrics::TranslateEventProto>* translate_events) {
299 if (IsLoggingEnabled()) {
300 translate_events->swap(translate_events_cache_);
301 translate_events_cache_.clear();
302 }
303 }
304
305 void TranslateRanker::RecordTranslateEvent(
306 const metrics::TranslateEventProto& translate_event) {
307 if (IsLoggingEnabled())
308 translate_events_cache_.push_back(translate_event);
309 }
310
311 } // namespace translate
OLDNEW
« no previous file with comments | « components/translate/core/browser/translate_ranker.h ('k') | components/translate/core/browser/translate_ranker_impl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698