OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/translate/core/browser/translate_ranker.h" | |
6 | |
7 #include <cmath> | |
8 | |
9 #include "base/bind.h" | |
10 #include "base/bind_helpers.h" | |
11 #include "base/command_line.h" | |
12 #include "base/metrics/histogram_macros.h" | |
13 #include "base/profiler/scoped_tracker.h" | |
14 #include "base/strings/string_util.h" | |
15 #include "components/metrics/proto/translate_event.pb.h" | |
16 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h" | |
17 #include "components/translate/core/browser/translate_download_manager.h" | |
18 #include "components/translate/core/browser/translate_prefs.h" | |
19 #include "components/translate/core/browser/translate_url_fetcher.h" | |
20 #include "components/translate/core/common/translate_switches.h" | |
21 | |
22 namespace translate { | |
23 | |
24 namespace { | |
25 | |
26 typedef google::protobuf::Map<std::string, float> WeightMap; | |
27 | |
28 const double kTranslationOfferThreshold = 0.5; | |
29 | |
30 // Parameters for model fetching. | |
31 const char kTranslateRankerModelURL[] = | |
32 "https://chromium-i18n.appspot.com/ssl-translate-ranker-model"; | |
33 const int kMaxRetryOn5xx = 3; | |
34 const int kDownloadRefractoryPeriodMin = 15; | |
35 const char kUnknown[] = "UNKNOWN"; | |
36 | |
37 // Enumeration denoting the outcome of an attempt to download the translate | |
38 // ranker model. This must be kept in sync with the TranslateRankerModelStatus | |
39 // enum in histograms.xml | |
40 enum ModelStatus { | |
41 MODEL_STATUS_OK = 0, | |
42 MODEL_STATUS_DOWNLOAD_THROTTLED = 1, | |
43 MODEL_STATUS_DOWNLOAD_FAILED = 2, | |
44 MODEL_STATUS_PARSE_FAILED = 3, | |
45 MODEL_STATUS_VALIDATION_FAILED = 4, | |
46 // Insert new values above this line. | |
47 MODEL_STATUS_MAX | |
48 }; | |
49 | |
50 double Sigmoid(double x) { | |
51 return 1.0 / (1.0 + exp(-x)); | |
52 } | |
53 | |
54 double ScoreComponent(const WeightMap& weights, const std::string& key) { | |
55 WeightMap::const_iterator i = weights.find(base::ToLowerASCII(key)); | |
56 if (i == weights.end()) | |
57 i = weights.find(kUnknown); | |
58 return i == weights.end() ? 0.0 : i->second; | |
59 } | |
60 | |
61 GURL GetTranslateRankerURL() { | |
62 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); | |
63 return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL) | |
64 ? command_line->GetSwitchValueASCII( | |
65 switches::kTranslateRankerModelURL) | |
66 : kTranslateRankerModelURL); | |
67 } | |
68 | |
69 void ReportModelStatus(ModelStatus model_status) { | |
70 UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status, | |
71 MODEL_STATUS_MAX); | |
72 } | |
73 | |
74 } // namespace | |
75 | |
76 const base::Feature kTranslateRankerQuery{"TranslateRankerQuery", | |
77 base::FEATURE_DISABLED_BY_DEFAULT}; | |
78 | |
79 const base::Feature kTranslateRankerEnforcement{ | |
80 "TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT}; | |
81 | |
82 const base::Feature kTranslateRankerLogging{"TranslateRankerLogging", | |
83 base::FEATURE_DISABLED_BY_DEFAULT}; | |
84 | |
85 TranslateRanker::~TranslateRanker() {} | |
86 | |
87 // static | |
88 bool TranslateRanker::IsEnabled() { | |
89 return IsQueryEnabled() || IsEnforcementEnabled(); | |
90 } | |
91 | |
92 // static | |
93 bool TranslateRanker::IsLoggingEnabled() { | |
94 return base::FeatureList::IsEnabled(kTranslateRankerLogging); | |
95 } | |
96 | |
97 // static | |
98 bool TranslateRanker::IsQueryEnabled() { | |
99 return base::FeatureList::IsEnabled(kTranslateRankerQuery); | |
100 } | |
101 | |
102 // static | |
103 bool TranslateRanker::IsEnforcementEnabled() { | |
104 return base::FeatureList::IsEnabled(kTranslateRankerEnforcement); | |
105 } | |
106 | |
107 // static | |
108 TranslateRanker* TranslateRanker::GetInstance() { | |
109 return base::Singleton<TranslateRanker>::get(); | |
110 } | |
111 | |
112 std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting( | |
113 const std::string& model_data) { | |
114 std::unique_ptr<TranslateRanker> ranker(new TranslateRanker()); | |
115 CHECK(ranker != nullptr); | |
116 ranker->ParseModel(0, true, model_data); | |
117 CHECK(ranker->model_ != nullptr); | |
118 return ranker; | |
119 } | |
120 | |
121 bool TranslateRanker::ShouldOfferTranslation( | |
122 const TranslatePrefs& translate_prefs, | |
123 const std::string& src_lang, | |
124 const std::string& dst_lang) { | |
125 // The ranker is a gate in the "show a translation prompt" flow. To retain | |
126 // the pre-existing functionality, it defaults to returning true in the | |
127 // absence of a model or if enforcement is disabled. As this is ranker is | |
128 // subsumed into a more general assist ranker, this default will go away | |
129 // (or become False). | |
130 const bool kDefaultResponse = true; | |
131 | |
132 // If we don't have a model, request one and return the default. | |
133 if (model_ == nullptr) { | |
134 FetchModelData(); | |
135 return kDefaultResponse; | |
136 } | |
137 | |
138 DCHECK(model_->has_logistic_regression_model()); | |
139 | |
140 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation"); | |
141 | |
142 // TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed. | |
143 tracked_objects::ScopedTracker tracking_profile( | |
144 FROM_HERE_WITH_EXPLICIT_FUNCTION( | |
145 "646711 translate::TranslateRanker::ShouldOfferTranslation")); | |
146 | |
147 const std::string& app_locale = | |
148 TranslateDownloadManager::GetInstance()->application_locale(); | |
149 const std::string& country = translate_prefs.GetCountry(); | |
150 double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang); | |
151 double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang); | |
152 double ignored_count = | |
153 model_->logistic_regression_model().has_ignore_ratio_weight() | |
154 ? translate_prefs.GetTranslationIgnoredCount(src_lang) | |
155 : 0.0; | |
156 double total_count = accept_count + denied_count + ignored_count; | |
157 double accept_ratio = | |
158 (total_count == 0.0) ? 0.0 : (accept_count / total_count); | |
159 double decline_ratio = | |
160 (total_count == 0.0) ? 0.0 : (denied_count / total_count); | |
161 double ignore_ratio = | |
162 (total_count == 0.0) ? 0.0 : (ignored_count / total_count); | |
163 DVLOG(3) << "TranslateRanker: features=[" | |
164 << "src_lang='" << src_lang << "', dst_lang='" << dst_lang | |
165 << "', country='" << country << "', locale='" << app_locale | |
166 << ", accept_count=" << accept_count | |
167 << ", denied_count=" << denied_count | |
168 << ", ignored_count=" << ignored_count | |
169 << ", total_count=" << total_count | |
170 << ", accept_ratio=" << accept_ratio | |
171 << ", decline_ratio=" << decline_ratio | |
172 << ", ignore_ratio=" << ignore_ratio << "]"; | |
173 | |
174 double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio, | |
175 src_lang, dst_lang, app_locale, country); | |
176 | |
177 DVLOG(2) << "TranslateRanker Score: " << score; | |
178 | |
179 bool result = (score >= kTranslationOfferThreshold); | |
180 | |
181 UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result); | |
182 | |
183 return result; | |
184 } | |
185 | |
186 TranslateRanker::TranslateRanker() {} | |
187 | |
188 double TranslateRanker::CalculateScore(double accept_ratio, | |
189 double decline_ratio, | |
190 double ignore_ratio, | |
191 const std::string& src_lang, | |
192 const std::string& dst_lang, | |
193 const std::string& locale, | |
194 const std::string& country) { | |
195 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore"); | |
196 DCHECK(model_ != nullptr); | |
197 DCHECK(model_->has_logistic_regression_model()); | |
198 const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel& | |
199 logit = model_->logistic_regression_model(); | |
200 double dot_product = | |
201 (accept_ratio * logit.accept_ratio_weight()) + | |
202 (decline_ratio * logit.decline_ratio_weight()) + | |
203 (ignore_ratio * logit.ignore_ratio_weight()) + | |
204 ScoreComponent(logit.source_language_weight(), src_lang) + | |
205 ScoreComponent(logit.dest_language_weight(), dst_lang) + | |
206 ScoreComponent(logit.country_weight(), country) + | |
207 ScoreComponent(logit.locale_weight(), locale); | |
208 return Sigmoid(dot_product + logit.bias()); | |
209 } | |
210 | |
211 int TranslateRanker::GetModelVersion() const { | |
212 return (model_ == nullptr) ? 0 : model_->version(); | |
213 } | |
214 | |
215 void TranslateRanker::FetchModelData() { | |
216 // Exit if the model has already been successfully loaded. | |
217 if (model_ != nullptr) { | |
218 return; | |
219 } | |
220 | |
221 // Exit if the download has been throttled. | |
222 if (base::Time::NowFromSystemTime() < next_earliest_download_time_) { | |
223 return; | |
224 } | |
225 | |
226 // Create the model fetcher if it does not exist. | |
227 if (model_fetcher_ == nullptr) { | |
228 model_fetcher_.reset(new TranslateURLFetcher(kFetcherId)); | |
229 model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx); | |
230 } | |
231 | |
232 // If a request is already in flight, do not issue a new one. | |
233 if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) { | |
234 DVLOG(2) << "TranslateRanker: Download complete or in progress."; | |
235 return; | |
236 } | |
237 | |
238 DVLOG(2) << "TranslateRanker: Downloading model..."; | |
239 | |
240 download_start_time_ = base::Time::Now(); | |
241 bool result = model_fetcher_->Request( | |
242 GetTranslateRankerURL(), | |
243 base::Bind(&TranslateRanker::ParseModel, base::Unretained(this))); | |
244 | |
245 if (!result) { | |
246 ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED); | |
247 next_earliest_download_time_ = | |
248 base::Time::NowFromSystemTime() + | |
249 base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin); | |
250 } | |
251 } | |
252 | |
253 void TranslateRanker::ParseModel(int /* id */, | |
254 bool success, | |
255 const std::string& data) { | |
256 UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel", | |
257 base::Time::Now() - download_start_time_); | |
258 | |
259 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel"); | |
260 | |
261 // We should not be here if the model has already been downloaded and parsed. | |
262 DCHECK(model_ == nullptr); | |
263 | |
264 // On failure, we just abort. The TranslateRanker will retry on a subsequent | |
265 // translation opportunity. The TranslateURLFetcher enforces a limit for | |
266 // retried requests. | |
267 if (!success) { | |
268 ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED); | |
269 return; | |
270 } | |
271 | |
272 // Create a new model instance, parse and validate the data, and move it over | |
273 // to be used by the ranker. | |
274 std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model( | |
275 new chrome_intelligence::TranslateRankerModel()); | |
276 | |
277 bool is_parseable = new_model->ParseFromString(data); | |
278 if (!is_parseable) { | |
279 ReportModelStatus(MODEL_STATUS_PARSE_FAILED); | |
280 return; | |
281 } | |
282 | |
283 bool is_valid = new_model->has_logistic_regression_model(); | |
284 if (!is_valid) { | |
285 ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED); | |
286 return; | |
287 } | |
288 | |
289 ReportModelStatus(MODEL_STATUS_OK); | |
290 model_ = std::move(new_model); | |
291 model_fetcher_.reset(); | |
292 } | |
293 | |
294 void TranslateRanker::FlushTranslateEvents( | |
295 std::vector<metrics::TranslateEventProto>* translate_events) { | |
296 if (IsLoggingEnabled()) { | |
297 translate_events->swap(translate_events_cache_); | |
298 translate_events_cache_.clear(); | |
299 } | |
300 } | |
301 | |
302 void TranslateRanker::RecordTranslateEvent( | |
303 const metrics::TranslateEventProto& translate_event) { | |
304 if (IsLoggingEnabled()) | |
305 translate_events_cache_.push_back(translate_event); | |
306 } | |
307 | |
308 } // namespace translate | |
OLD | NEW |