OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/translate/core/browser/translate_ranker.h" | |
6 | |
7 #include <cmath> | |
8 | |
9 #include "base/bind.h" | |
10 #include "base/bind_helpers.h" | |
11 #include "base/command_line.h" | |
12 #include "base/metrics/histogram_macros.h" | |
13 #include "base/profiler/scoped_tracker.h" | |
14 #include "base/strings/string_util.h" | |
15 #include "components/metrics/proto/translate_event.pb.h" | |
16 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h" | |
17 #include "components/translate/core/browser/translate_download_manager.h" | |
18 #include "components/translate/core/browser/translate_prefs.h" | |
19 #include "components/translate/core/browser/translate_url_fetcher.h" | |
20 #include "components/translate/core/common/translate_switches.h" | |
21 | |
22 namespace translate { | |
23 | |
24 namespace { | |
25 | |
26 typedef google::protobuf::Map<std::string, float> WeightMap; | |
27 | |
28 const double kTranslationOfferThreshold = 0.5; | |
29 | |
30 // Parameters for model fetching. | |
31 const char kTranslateRankerModelURL[] = | |
32 "https://chromium-i18n.appspot.com/ssl-translate-ranker-model"; | |
33 const int kMaxRetryOn5xx = 3; | |
34 const int kDownloadRefractoryPeriodMin = 15; | |
35 const char kUnknown[] = "UNKNOWN"; | |
36 | |
37 // Enumeration denoting the outcome of an attempt to download the translate | |
38 // ranker model. This must be kept in sync with the TranslateRankerModelStatus | |
39 // enum in histograms.xml | |
40 enum ModelStatus { | |
41 MODEL_STATUS_OK = 0, | |
42 MODEL_STATUS_DOWNLOAD_THROTTLED = 1, | |
43 MODEL_STATUS_DOWNLOAD_FAILED = 2, | |
44 MODEL_STATUS_PARSE_FAILED = 3, | |
45 MODEL_STATUS_VALIDATION_FAILED = 4, | |
46 // Insert new values above this line. | |
47 MODEL_STATUS_MAX | |
48 }; | |
49 | |
50 double Sigmoid(double x) { | |
51 return 1.0 / (1.0 + exp(-x)); | |
52 } | |
53 | |
54 double ScoreComponent(const WeightMap& weights, const std::string& key) { | |
55 WeightMap::const_iterator i = weights.find(base::ToLowerASCII(key)); | |
56 if (i == weights.end()) | |
57 i = weights.find(kUnknown); | |
58 return i == weights.end() ? 0.0 : i->second; | |
59 } | |
60 | |
61 GURL GetTranslateRankerURL() { | |
62 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); | |
63 return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL) | |
64 ? command_line->GetSwitchValueASCII( | |
65 switches::kTranslateRankerModelURL) | |
66 : kTranslateRankerModelURL); | |
67 } | |
68 | |
69 void ReportModelStatus(ModelStatus model_status) { | |
70 UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status, | |
71 MODEL_STATUS_MAX); | |
72 } | |
73 | |
74 } // namespace | |
75 | |
76 const base::Feature kTranslateRankerQuery{"TranslateRankerQuery", | |
77 base::FEATURE_DISABLED_BY_DEFAULT}; | |
78 | |
79 const base::Feature kTranslateRankerEnforcement{ | |
80 "TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT}; | |
81 | |
82 const base::Feature kTranslateRankerLogging{"TranslateRankerLogging", | |
83 base::FEATURE_DISABLED_BY_DEFAULT}; | |
84 | |
85 TranslateRanker::~TranslateRanker() {} | |
86 | |
87 // static | |
88 bool TranslateRanker::IsEnabled() { | |
89 return IsQueryEnabled() || IsEnforcementEnabled(); | |
90 } | |
91 | |
92 // static | |
93 bool TranslateRanker::IsLoggingEnabled() { | |
94 return base::FeatureList::IsEnabled(kTranslateRankerLogging); | |
95 } | |
96 | |
97 // static | |
98 bool TranslateRanker::IsQueryEnabled() { | |
99 return base::FeatureList::IsEnabled(kTranslateRankerQuery); | |
100 } | |
101 | |
102 // static | |
103 bool TranslateRanker::IsEnforcementEnabled() { | |
104 return base::FeatureList::IsEnabled(kTranslateRankerEnforcement); | |
105 } | |
106 | |
107 // static | |
108 TranslateRanker* TranslateRanker::GetInstance() { | |
109 return base::Singleton<TranslateRanker>::get(); | |
110 } | |
111 | |
112 std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting( | |
113 const std::string& model_data) { | |
114 std::unique_ptr<TranslateRanker> ranker(new TranslateRanker()); | |
115 CHECK(ranker != nullptr); | |
116 ranker->ParseModel(0, true, model_data); | |
117 CHECK(ranker->model_ != nullptr); | |
118 return ranker; | |
119 } | |
120 | |
121 bool TranslateRanker::ShouldOfferTranslation( | |
122 const TranslatePrefs& translate_prefs, | |
123 const std::string& src_lang, | |
124 const std::string& dst_lang) { | |
125 // The ranker is a gate in the "show a translation prompt" flow. To retain | |
126 // the pre-existing functionality, it defaults to returning true in the | |
127 // absence of a model or if enforcement is disabled. As this is ranker is | |
128 // subsumed into a more general assist ranker, this default will go away | |
129 // (or become False). | |
130 const bool kDefaultResponse = true; | |
131 | |
132 // If we don't have a model, request one and return the default. | |
133 if (model_ == nullptr) { | |
134 FetchModelData(); | |
135 return kDefaultResponse; | |
136 } | |
137 | |
138 DCHECK(model_->has_logistic_regression_model()); | |
139 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation"); | |
140 | |
141 // TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed. | |
142 tracked_objects::ScopedTracker tracking_profile( | |
143 FROM_HERE_WITH_EXPLICIT_FUNCTION( | |
144 "646711 translate::TranslateRanker::ShouldOfferTranslation")); | |
145 | |
146 const std::string& app_locale = | |
147 TranslateDownloadManager::GetInstance()->application_locale(); | |
148 const std::string& country = translate_prefs.GetCountry(); | |
149 int accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang); | |
150 int decline_count = translate_prefs.GetTranslationDeniedCount(src_lang); | |
151 int ignore_count = translate_prefs.GetTranslationIgnoredCount(src_lang); | |
152 DVLOG(3) << "TranslateRanker: features=[" | |
153 << "src_lang='" << src_lang << "', dst_lang='" << dst_lang | |
154 << "', country='" << country << "', locale='" << app_locale | |
155 << ", accept_count=" << accept_count | |
156 << ", decline_count=" << decline_count | |
157 << ", ignore_count=" << ignore_count << "]"; | |
158 | |
159 double score = CalculateScore(accept_count, decline_count, ignore_count, | |
160 src_lang, dst_lang, app_locale, country); | |
161 | |
162 DVLOG(2) << "TranslateRanker Score: " << score; | |
163 | |
164 bool result = (score >= kTranslationOfferThreshold); | |
165 | |
166 UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result); | |
167 | |
168 return result; | |
169 } | |
170 | |
171 TranslateRanker::TranslateRanker() {} | |
172 | |
173 double TranslateRanker::CalculateScore(int accept_count, | |
174 int decline_count, | |
175 int ignore_count, | |
176 const std::string& src_lang, | |
177 const std::string& dst_lang, | |
178 const std::string& locale, | |
179 const std::string& country) { | |
180 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore"); | |
181 DCHECK(model_ != nullptr); | |
182 DCHECK(model_->has_logistic_regression_model()); | |
183 | |
184 int total_count = accept_count + decline_count + ignore_count; | |
185 double accept_ratio = | |
186 (total_count == 0) ? 0.0 : (double(accept_count) / total_count); | |
187 double decline_ratio = | |
188 (total_count == 0) ? 0.0 : (double(decline_count) / total_count); | |
189 double ignore_ratio = | |
190 (total_count == 0) ? 0.0 : (double(ignore_count) / total_count); | |
191 DVLOG(3) << "TranslateRanker: ratios=[" | |
192 << ", accept_ratio=" << accept_ratio | |
193 << ", decline_ratio=" << decline_ratio | |
194 << ", ignore_ratio=" << ignore_ratio << "]"; | |
195 | |
196 const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel& | |
197 logit = model_->logistic_regression_model(); | |
198 double dot_product = | |
199 (accept_ratio * logit.accept_ratio_weight()) + | |
200 (decline_ratio * logit.decline_ratio_weight()) + | |
201 (ignore_ratio * logit.ignore_ratio_weight()) + | |
202 (accept_count * logit.accept_count_weight()) + | |
203 (decline_count * logit.decline_count_weight()) + | |
204 (ignore_count * logit.ignore_count_weight()) + | |
205 ScoreComponent(logit.source_language_weight(), src_lang) + | |
206 ScoreComponent(logit.dest_language_weight(), dst_lang) + | |
207 ScoreComponent(logit.country_weight(), country) + | |
208 ScoreComponent(logit.locale_weight(), locale); | |
209 return Sigmoid(dot_product + logit.bias()); | |
210 } | |
211 | |
212 int TranslateRanker::GetModelVersion() const { | |
213 return (model_ == nullptr) ? 0 : model_->version(); | |
214 } | |
215 | |
216 void TranslateRanker::FetchModelData() { | |
217 // Exit if the model has already been successfully loaded. | |
218 if (model_ != nullptr) { | |
219 return; | |
220 } | |
221 | |
222 // Exit if the download has been throttled. | |
223 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) { | |
224 return; | |
225 } | |
226 | |
227 // Create the model fetcher if it does not exist. | |
228 if (model_fetcher_ == nullptr) { | |
229 model_fetcher_.reset(new TranslateURLFetcher(kFetcherId)); | |
230 model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); | |
231 } | |
232 | |
233 // If a request is already in flight, do not issue a new one. | |
234 if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) { | |
235 DVLOG(2) << "TranslateRanker: Download complete or in progress."; | |
236 return; | |
237 } | |
238 | |
239 DVLOG(2) << "TranslateRanker: Downloading model..."; | |
240 | |
241 download_start_time_ = base::Time::Now(); | |
242 bool result = model_fetcher_->Request( | |
243 GetTranslateRankerURL(), | |
244 base::Bind(&TranslateRanker::ParseModel, base::Unretained(this))); | |
245 | |
246 if (!result) { | |
247 ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED); | |
248 next_earliest_download_time_ = | |
249 base::Time::NowFromSystemTime() + | |
250 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); | |
251 } | |
252 } | |
253 | |
254 void TranslateRanker::ParseModel(int /* id */, | |
255 bool success, | |
256 const std::string& data) { | |
257 UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel", | |
258 base::Time::Now() - download_start_time_); | |
259 | |
260 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel"); | |
261 | |
262 // We should not be here if the model has already been downloaded and parsed. | |
263 DCHECK(model_ == nullptr); | |
264 | |
265 // On failure, we just abort. The TranslateRanker will retry on a subsequent | |
266 // translation opportunity. The TranslateURLFetcher enforces a limit for | |
267 // retried requests. | |
268 if (!success) { | |
269 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED); | |
270 return; | |
271 } | |
272 | |
273 // Create a new model instance, parse and validate the data, and move it over | |
274 // to be used by the ranker. | |
275 std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model( | |
276 new chrome_intelligence::TranslateRankerModel()); | |
277 | |
278 bool is_parseable = new_model->ParseFromString(data); | |
279 if (!is_parseable) { | |
280 ReportModelStatus(MODEL_STATUS_PARSE_FAILED); | |
281 return; | |
282 } | |
283 | |
284 bool is_valid = new_model->has_logistic_regression_model(); | |
285 if (!is_valid) { | |
286 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED); | |
287 return; | |
288 } | |
289 | |
290 ReportModelStatus(MODEL_STATUS_OK); | |
291 model_ = std::move(new_model); | |
292 model_fetcher_.reset(); | |
293 | |
294 DVLOG(3) << "Successfully loaded model version " << GetModelVersion() << "."; | |
295 } | |
296 | |
297 void TranslateRanker::FlushTranslateEvents( | |
298 std::vector<metrics::TranslateEventProto>* translate_events) { | |
299 if (IsLoggingEnabled()) { | |
300 translate_events->swap(translate_events_cache_); | |
301 translate_events_cache_.clear(); | |
302 } | |
303 } | |
304 | |
305 void TranslateRanker::RecordTranslateEvent( | |
306 const metrics::TranslateEventProto& translate_event) { | |
307 if (IsLoggingEnabled()) | |
308 translate_events_cache_.push_back(translate_event); | |
309 } | |
310 | |
311 } // namespace translate | |
OLD | NEW |