OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
groby-ooo-7-16
2017/02/23 00:01:35
Can you convince codereview this is mostly a copy
Roger McFarlane (Chromium)
2017/02/23 21:17:56
sent file specific diff out of band.
| |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/translate/core/browser/translate_ranker_impl.h" | |
6 | |
7 #include <cmath> | |
8 | |
9 #include "base/bind.h" | |
10 #include "base/bind_helpers.h" | |
11 #include "base/command_line.h" | |
12 #include "base/files/file_path.h" | |
13 #include "base/files/file_util.h" | |
14 #include "base/memory/ptr_util.h" | |
15 #include "base/metrics/histogram_macros.h" | |
16 #include "base/profiler/scoped_tracker.h" | |
17 #include "base/strings/string_number_conversions.h" | |
18 #include "base/strings/string_util.h" | |
19 #include "base/task_runner.h" | |
20 #include "base/threading/thread_task_runner_handle.h" | |
21 #include "components/metrics/proto/translate_event.pb.h" | |
22 #include "components/translate/core/browser/proto/ranker_model.pb.h" | |
23 #include "components/translate/core/browser/proto/translate_ranker_model.pb.h" | |
24 #include "components/translate/core/browser/ranker_model.h" | |
25 #include "components/translate/core/browser/translate_download_manager.h" | |
26 #include "components/translate/core/browser/translate_prefs.h" | |
27 #include "components/translate/core/browser/translate_url_fetcher.h" | |
28 #include "components/translate/core/common/translate_switches.h" | |
29 #include "components/variations/variations_associated_data.h" | |
30 #include "url/gurl.h" | |
31 | |
32 namespace translate { | |
33 | |
34 namespace { | |
35 | |
36 using chrome_intelligence::RankerModel; | |
37 using chrome_intelligence::RankerModelProto; | |
38 using chrome_intelligence::TranslateRankerModel; | |
39 | |
40 const double kTranslationOfferThreshold = 0.5; | |
41 | |
42 const char kTranslateRankerModelFileName[] = "Translate Ranker Model"; | |
43 const char kUmaPrefix[] = "Translate.Ranker"; | |
44 const char kUnknown[] = "UNKNOWN"; | |
45 | |
46 double Sigmoid(double x) { | |
47 return 1.0 / (1.0 + exp(-x)); | |
48 } | |
49 | |
50 double SafeRatio(int numerator, int denominator) { | |
51 return denominator ? (numerator / static_cast<double>(denominator)) : 0.0; | |
52 } | |
53 | |
54 double ScoreComponent(const google::protobuf::Map<std::string, float>& weights, | |
55 const std::string& key) { | |
56 auto i = weights.find(base::ToLowerASCII(key)); | |
57 if (i == weights.end()) | |
58 i = weights.find(kUnknown); | |
59 return i == weights.end() ? 0.0 : i->second; | |
60 } | |
61 | |
62 RankerModelStatus ValidateModel(const RankerModel& model) { | |
63 if (model.proto().model_case() != RankerModelProto::kTranslate) | |
64 return RankerModelStatus::VALIDATION_FAILED; | |
65 | |
66 if (model.proto().translate().model_revision_case() != | |
67 TranslateRankerModel::kLogisticRegressionModel) { | |
68 return RankerModelStatus::INCOMPATIBLE; | |
69 } | |
70 | |
71 return RankerModelStatus::OK; | |
72 } | |
73 | |
74 } // namespace | |
75 | |
76 const base::Feature kTranslateRankerQuery{"TranslateRankerQuery", | |
77 base::FEATURE_DISABLED_BY_DEFAULT}; | |
78 | |
79 const base::Feature kTranslateRankerEnforcement{ | |
80 "TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT}; | |
81 | |
82 const base::Feature kTranslateRankerLogging{"TranslateRankerLogging", | |
83 base::FEATURE_DISABLED_BY_DEFAULT}; | |
84 | |
85 TranslateRankerFeatures::TranslateRankerFeatures() {} | |
86 | |
87 TranslateRankerFeatures::TranslateRankerFeatures(int accepted, | |
88 int denied, | |
89 int ignored, | |
90 const std::string& src, | |
91 const std::string& dst, | |
92 const std::string& cntry, | |
93 const std::string& locale) | |
94 : accepted_count(accepted), | |
95 denied_count(denied), | |
96 ignored_count(ignored), | |
97 total_count(accepted_count + denied_count + ignored_count), | |
98 src_lang(src), | |
99 dst_lang(dst), | |
100 country(cntry), | |
101 app_locale(locale), | |
102 accepted_ratio(SafeRatio(accepted_count, total_count)), | |
103 denied_ratio(SafeRatio(denied_count, total_count)), | |
104 ignored_ratio(SafeRatio(ignored_count, total_count)) {} | |
105 | |
106 TranslateRankerFeatures::TranslateRankerFeatures(const TranslatePrefs& prefs, | |
107 const std::string& src, | |
108 const std::string& dst, | |
109 const std::string& locale) | |
110 : TranslateRankerFeatures(prefs.GetTranslationAcceptedCount(src), | |
111 prefs.GetTranslationDeniedCount(src), | |
112 prefs.GetTranslationIgnoredCount(src), | |
113 src, | |
114 dst, | |
115 prefs.GetCountry(), | |
116 locale) {} | |
117 | |
118 TranslateRankerFeatures::~TranslateRankerFeatures() {} | |
119 | |
120 void TranslateRankerFeatures::WriteTo(std::ostream& stream) const { | |
121 stream << "src_lang='" << src_lang << "', " | |
122 << "dst_lang='" << dst_lang << "', " | |
123 << "country='" << country << "', " | |
124 << "app_locale='" << app_locale << "', " | |
125 << "accept_count=" << accepted_count << ", " | |
126 << "denied_count=" << denied_count << ", " | |
127 << "ignored_count=" << ignored_count << ", " | |
128 << "total_count=" << total_count << ", " | |
129 << "accept_ratio=" << accepted_ratio << ", " | |
130 << "decline_ratio=" << denied_ratio << ", " | |
131 << "ignore_ratio=" << ignored_ratio; | |
132 } | |
133 | |
134 TranslateRankerImpl::TranslateRankerImpl(const base::FilePath& model_path, | |
135 const GURL& model_url) | |
136 : weak_ptr_factory_(this) { | |
137 model_loader_ = base::MakeUnique<RankerModelLoader>( | |
138 base::Bind(&ValidateModel), | |
139 base::Bind(&TranslateRankerImpl::OnModelAvailable, | |
140 weak_ptr_factory_.GetWeakPtr()), | |
141 model_path, model_url, kUmaPrefix); | |
142 model_loader_->Start(); | |
143 } | |
144 | |
145 TranslateRankerImpl::~TranslateRankerImpl() {} | |
146 | |
147 // static | |
148 base::FilePath TranslateRankerImpl::GetModelPath( | |
149 const base::FilePath& data_dir) { | |
150 if (data_dir.empty()) | |
151 return base::FilePath(); | |
152 | |
153 // Otherwise, look for the file in data dir. | |
154 return data_dir.AppendASCII(kTranslateRankerModelFileName); | |
155 } | |
156 | |
157 // static | |
158 GURL TranslateRankerImpl::GetModelURL() { | |
159 // Allow override of the ranker model URL from the command line. | |
160 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); | |
161 if (command_line->HasSwitch(switches::kTranslateRankerModelURL)) { | |
162 return GURL( | |
163 command_line->GetSwitchValueASCII(switches::kTranslateRankerModelURL)); | |
164 } | |
165 | |
166 // Otherwise take the ranker model URL from the ranker query variation. | |
167 const std::string raw_url = variations::GetVariationParamValueByFeature( | |
168 kTranslateRankerQuery, switches::kTranslateRankerModelURL); | |
169 | |
170 DVLOG(3) << switches::kTranslateRankerModelURL << " = " << raw_url; | |
171 | |
172 return GURL(raw_url); | |
173 } | |
174 | |
175 bool TranslateRankerImpl::IsLoggingEnabled() { | |
176 return base::FeatureList::IsEnabled(kTranslateRankerLogging); | |
177 } | |
178 | |
179 bool TranslateRankerImpl::IsQueryEnabled() { | |
180 return base::FeatureList::IsEnabled(kTranslateRankerQuery); | |
181 } | |
182 | |
183 bool TranslateRankerImpl::IsEnforcementEnabled() { | |
184 return base::FeatureList::IsEnabled(kTranslateRankerEnforcement); | |
185 } | |
186 | |
187 int TranslateRankerImpl::GetModelVersion() const { | |
188 return model_ ? model_->proto().translate().version() : 0; | |
189 } | |
190 | |
191 bool TranslateRankerImpl::ShouldOfferTranslation( | |
192 const TranslatePrefs& translate_prefs, | |
193 const std::string& src_lang, | |
194 const std::string& dst_lang) { | |
195 DCHECK(sequence_checker_.CalledOnValidSequence()); | |
196 // The ranker is a gate in the "show a translation prompt" flow. To retain | |
197 // the pre-existing functionality, it defaults to returning true in the | |
198 // absence of a model or if enforcement is disabled. As this is ranker is | |
199 // subsumed into a more general assist ranker, this default will go away | |
200 // (or become False). | |
201 const bool kDefaultResponse = true; | |
202 | |
203 // If we don't have a model, request one and return the default. | |
204 if (model_ == nullptr) { | |
205 return kDefaultResponse; | |
206 } | |
207 | |
208 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation"); | |
209 | |
210 // TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed. | |
211 tracked_objects::ScopedTracker tracking_profile( | |
212 FROM_HERE_WITH_EXPLICIT_FUNCTION( | |
213 "646711 translate::TranslateRankerImpl::ShouldOfferTranslation")); | |
214 | |
215 TranslateRankerFeatures features( | |
216 translate_prefs, src_lang, dst_lang, | |
217 TranslateDownloadManager::GetInstance()->application_locale()); | |
218 | |
219 double score = CalculateScore(features); | |
220 | |
221 DVLOG(2) << "TranslateRankerImpl::ShouldOfferTranslation: " | |
222 << "Score = " << score << ", Features=[" << features << "]"; | |
223 | |
224 bool result = (score >= kTranslationOfferThreshold); | |
225 | |
226 UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result); | |
227 | |
228 return result; | |
229 } | |
230 | |
231 double TranslateRankerImpl::CalculateScore( | |
232 const TranslateRankerFeatures& features) { | |
233 DCHECK(sequence_checker_.CalledOnValidSequence()); | |
234 SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore"); | |
235 DCHECK(model_ != nullptr); | |
236 const TranslateRankerModel::LogisticRegressionModel& logit = | |
237 model_->proto().translate().logistic_regression_model(); | |
238 | |
239 double dot_product = | |
240 (features.accepted_count * logit.accept_count_weight()) + | |
241 (features.denied_count * logit.decline_count_weight()) + | |
242 (features.ignored_count * logit.ignore_count_weight()) + | |
243 (features.accepted_ratio * logit.accept_ratio_weight()) + | |
244 (features.denied_ratio * logit.decline_ratio_weight()) + | |
245 (features.ignored_ratio * logit.ignore_ratio_weight()) + | |
246 ScoreComponent(logit.source_language_weight(), features.src_lang) + | |
247 ScoreComponent(logit.dest_language_weight(), features.dst_lang) + | |
248 ScoreComponent(logit.country_weight(), features.country) + | |
249 ScoreComponent(logit.locale_weight(), features.app_locale); | |
250 | |
251 return Sigmoid(dot_product + logit.bias()); | |
252 } | |
253 | |
254 void TranslateRankerImpl::FlushTranslateEvents( | |
255 std::vector<metrics::TranslateEventProto>* events) { | |
256 DCHECK(sequence_checker_.CalledOnValidSequence()); | |
257 DVLOG(3) << "Flushing translate ranker events."; | |
258 events->swap(event_cache_); | |
259 event_cache_.clear(); | |
260 } | |
261 | |
262 void TranslateRankerImpl::AddTranslateEvent( | |
263 const metrics::TranslateEventProto& event) { | |
264 DCHECK(sequence_checker_.CalledOnValidSequence()); | |
265 DVLOG(3) << "Adding translate ranker event."; | |
266 if (IsLoggingEnabled()) | |
267 event_cache_.push_back(event); | |
268 } | |
269 | |
270 void TranslateRankerImpl::OnModelAvailable(std::unique_ptr<RankerModel> model) { | |
271 DCHECK(sequence_checker_.CalledOnValidSequence()); | |
272 model_ = std::move(model); | |
273 } | |
274 | |
275 } // namespace translate | |
276 | |
277 std::ostream& operator<<(std::ostream& stream, | |
278 const translate::TranslateRankerFeatures& features) { | |
279 features.WriteTo(stream); | |
280 return stream; | |
281 } | |
OLD | NEW |