OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // The |FeedbackSender| object stores the user feedback to spellcheck | |
6 // suggestions in a |Feedback| object. | |
7 // | |
8 // When spelling service returns spellcheck results, these results first arrive | |
9 // in |FeedbackSender| to assign hash identifiers for each | |
10 // misspelling-suggestion pair. If the spelling service identifies the same | |
11 // misspelling as already displayed to the user, then |FeedbackSender| reuses | |
12 // the same hash identifiers to avoid duplication. It detects the duplicates by | |
13 // comparing misspelling offsets in text. Spelling service can return duplicates | |
14 // because we request spellcheck for whole paragraphs, as context around a | |
15 // misspelled word is important to the spellcheck algorithm. | |
16 // | |
17 // All feedback is initially pending. When a user acts upon a misspelling such | |
18 // that the misspelling is no longer displayed (red squiggly line goes away), | |
19 // then the feedback for this misspelling is finalized. All finalized feedback | |
20 // is erased after being sent to the spelling service. Pending feedback is kept | |
21 // around for |kSessionHours| hours and then finalized even if user did not act | |
22 // on the misspellings. | |
23 // | |
24 // |FeedbackSender| periodically requests a list of hashes of all remaining | |
25 // misspellings in renderers. When a renderer responds with a list of hashes, | |
26 // |FeedbackSender| uses the list to determine which misspellings are no longer | |
27 // displayed to the user and sends the current state of user feedback to the | |
28 // spelling service. | |
29 | |
30 #include "chrome/browser/spellchecker/feedback_sender.h" | |
31 | |
32 #include <algorithm> | |
33 #include <iterator> | |
34 #include <utility> | |
35 | |
36 #include "base/command_line.h" | |
37 #include "base/hash.h" | |
38 #include "base/json/json_writer.h" | |
39 #include "base/location.h" | |
40 #include "base/metrics/field_trial.h" | |
41 #include "base/single_thread_task_runner.h" | |
42 #include "base/stl_util.h" | |
43 #include "base/strings/string_number_conversions.h" | |
44 #include "base/strings/stringprintf.h" | |
45 #include "base/threading/thread_task_runner_handle.h" | |
46 #include "base/values.h" | |
47 #include "chrome/browser/spellchecker/word_trimmer.h" | |
48 #include "components/data_use_measurement/core/data_use_user_data.h" | |
49 #include "components/spellcheck/common/spellcheck_common.h" | |
50 #include "components/spellcheck/common/spellcheck_marker.h" | |
51 #include "components/spellcheck/common/spellcheck_messages.h" | |
52 #include "components/spellcheck/common/spellcheck_switches.h" | |
53 #include "content/public/browser/render_process_host.h" | |
54 #include "crypto/random.h" | |
55 #include "crypto/secure_hash.h" | |
56 #include "crypto/sha2.h" | |
57 #include "google_apis/google_api_keys.h" | |
58 #include "net/base/load_flags.h" | |
59 #include "net/url_request/url_fetcher.h" | |
60 #include "net/url_request/url_request_context_getter.h" | |
61 | |
62 namespace spellcheck { | |
63 | |
64 namespace { | |
65 | |
66 const size_t kMaxFeedbackSizeBytes = 10 * 1024 * 1024; // 10 MB | |
67 | |
68 // The default URL where feedback data is sent. | |
69 const char kFeedbackServiceURL[] = "https://www.googleapis.com/rpc"; | |
70 | |
71 // The minimum number of seconds between sending batches of feedback. | |
72 const int kMinIntervalSeconds = 5; | |
73 | |
74 // Returns a hash of |session_start|, the current timestamp, and | |
75 // |suggestion_index|. | |
76 uint32_t BuildHash(const base::Time& session_start, size_t suggestion_index) { | |
77 return base::Hash( | |
78 base::StringPrintf("%" PRId64 "%" PRId64 "%" PRIuS, | |
79 session_start.ToInternalValue(), | |
80 base::Time::Now().ToInternalValue(), | |
81 suggestion_index)); | |
82 } | |
83 | |
84 uint64_t BuildAnonymousHash(const FeedbackSender::RandSalt& r, | |
85 const base::string16& s) { | |
86 std::unique_ptr<crypto::SecureHash> hash( | |
87 crypto::SecureHash::Create(crypto::SecureHash::SHA256)); | |
88 | |
89 hash->Update(s.data(), s.size() * sizeof(s[0])); | |
90 hash->Update(&r, sizeof(r)); | |
91 | |
92 uint64_t result; | |
93 hash->Finish(&result, sizeof(result)); | |
94 return result; | |
95 } | |
96 | |
97 // Returns a pending feedback data structure for the spellcheck |result| and | |
98 // |text|. | |
99 Misspelling BuildFeedback(const SpellCheckResult& result, | |
100 const base::string16& text) { | |
101 size_t start = result.location; | |
102 base::string16 context = TrimWords(&start, start + result.length, text, | |
103 spellcheck::kContextWordCount); | |
104 return Misspelling(context, | |
105 start, | |
106 result.length, | |
107 std::vector<base::string16>(1, result.replacement), | |
108 result.hash); | |
109 } | |
110 | |
111 // Builds suggestion info from |suggestions|. | |
112 std::unique_ptr<base::ListValue> BuildSuggestionInfo( | |
113 const std::vector<Misspelling>& misspellings, | |
114 bool is_first_feedback_batch, | |
115 const FeedbackSender::RandSalt& salt) { | |
116 std::unique_ptr<base::ListValue> list(new base::ListValue); | |
117 for (const auto& raw_misspelling : misspellings) { | |
118 std::unique_ptr<base::DictionaryValue> misspelling( | |
119 SerializeMisspelling(raw_misspelling)); | |
120 misspelling->SetBoolean("isFirstInSession", is_first_feedback_batch); | |
121 misspelling->SetBoolean("isAutoCorrection", false); | |
122 // hash(R) fields come from red_underline_extensions.proto | |
123 // fixed64 user_misspelling_id = ... | |
124 misspelling->SetString( | |
125 "userMisspellingId", | |
126 base::Uint64ToString(BuildAnonymousHash( | |
127 salt, raw_misspelling.context.substr(raw_misspelling.location, | |
128 raw_misspelling.length)))); | |
129 // repeated fixed64 user_suggestion_id = ... | |
130 std::unique_ptr<base::ListValue> suggestion_list(new base::ListValue()); | |
131 for (const auto& suggestion : raw_misspelling.suggestions) { | |
132 suggestion_list->AppendString( | |
133 base::Uint64ToString(BuildAnonymousHash(salt, suggestion))); | |
134 } | |
135 misspelling->Set("userSuggestionId", suggestion_list.release()); | |
136 list->Append(std::move(misspelling)); | |
137 } | |
138 return list; | |
139 } | |
140 | |
141 // Builds feedback parameters from |suggestion_info|, |language|, and |country|. | |
142 // Takes ownership of |suggestion_list|. | |
143 std::unique_ptr<base::DictionaryValue> BuildParams( | |
144 std::unique_ptr<base::ListValue> suggestion_info, | |
145 const std::string& language, | |
146 const std::string& country) { | |
147 std::unique_ptr<base::DictionaryValue> params(new base::DictionaryValue); | |
148 params->Set("suggestionInfo", suggestion_info.release()); | |
149 params->SetString("key", google_apis::GetAPIKey()); | |
150 params->SetString("language", language); | |
151 params->SetString("originCountry", country); | |
152 params->SetString("clientName", "Chrome"); | |
153 return params; | |
154 } | |
155 | |
156 // Builds feedback data from |params|. Takes ownership of |params|. | |
157 std::unique_ptr<base::Value> BuildFeedbackValue( | |
158 std::unique_ptr<base::DictionaryValue> params, | |
159 const std::string& api_version) { | |
160 std::unique_ptr<base::DictionaryValue> result(new base::DictionaryValue); | |
161 result->Set("params", params.release()); | |
162 result->SetString("method", "spelling.feedback"); | |
163 result->SetString("apiVersion", api_version); | |
164 return std::move(result); | |
165 } | |
166 | |
167 // Returns true if the misspelling location is within text bounds. | |
168 bool IsInBounds(int misspelling_location, | |
169 int misspelling_length, | |
170 size_t text_length) { | |
171 return misspelling_location >= 0 && misspelling_length > 0 && | |
172 static_cast<size_t>(misspelling_location) < text_length && | |
173 static_cast<size_t>(misspelling_location + misspelling_length) <= | |
174 text_length; | |
175 } | |
176 | |
177 // Returns the feedback API version. | |
178 std::string GetApiVersion() { | |
179 // This guard is temporary. | |
180 // TODO(rouslan): Remove the guard. http://crbug.com/247726 | |
181 if (base::FieldTrialList::FindFullName(kFeedbackFieldTrialName) == | |
182 kFeedbackFieldTrialEnabledGroupName && | |
183 base::CommandLine::ForCurrentProcess()->HasSwitch( | |
184 spellcheck::switches::kEnableSpellingFeedbackFieldTrial)) { | |
185 return "v2-internal"; | |
186 } | |
187 return "v2"; | |
188 } | |
189 | |
190 } // namespace | |
191 | |
192 FeedbackSender::FeedbackSender(net::URLRequestContextGetter* request_context, | |
193 const std::string& language, | |
194 const std::string& country) | |
195 : request_context_(request_context), | |
196 api_version_(GetApiVersion()), | |
197 language_(language), | |
198 country_(country), | |
199 misspelling_counter_(0), | |
200 feedback_(kMaxFeedbackSizeBytes), | |
201 session_start_(base::Time::Now()), | |
202 feedback_service_url_(kFeedbackServiceURL) { | |
203 // The command-line switch is for testing and temporary. | |
204 // TODO(rouslan): Remove the command-line switch when testing is complete. | |
205 // http://crbug.com/247726 | |
206 if (base::CommandLine::ForCurrentProcess()->HasSwitch( | |
207 spellcheck::switches::kSpellingServiceFeedbackUrl)) { | |
208 feedback_service_url_ = | |
209 GURL(base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
210 spellcheck::switches::kSpellingServiceFeedbackUrl)); | |
211 } | |
212 } | |
213 | |
214 FeedbackSender::~FeedbackSender() { | |
215 } | |
216 | |
217 void FeedbackSender::SelectedSuggestion(uint32_t hash, int suggestion_index) { | |
218 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
219 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
220 // when the session expires every |kSessionHours| hours. | |
221 if (!misspelling) | |
222 return; | |
223 misspelling->action.set_type(SpellcheckAction::TYPE_SELECT); | |
224 misspelling->action.set_index(suggestion_index); | |
225 misspelling->timestamp = base::Time::Now(); | |
226 } | |
227 | |
228 void FeedbackSender::AddedToDictionary(uint32_t hash) { | |
229 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
230 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
231 // when the session expires every |kSessionHours| hours. | |
232 if (!misspelling) | |
233 return; | |
234 misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT); | |
235 misspelling->timestamp = base::Time::Now(); | |
236 const std::set<uint32_t>& hashes = | |
237 feedback_.FindMisspellings(GetMisspelledString(*misspelling)); | |
238 for (uint32_t hash : hashes) { | |
239 Misspelling* duplicate_misspelling = feedback_.GetMisspelling(hash); | |
240 if (!duplicate_misspelling || duplicate_misspelling->action.IsFinal()) | |
241 continue; | |
242 duplicate_misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT); | |
243 duplicate_misspelling->timestamp = misspelling->timestamp; | |
244 } | |
245 } | |
246 | |
247 void FeedbackSender::RecordInDictionary(uint32_t hash) { | |
248 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
249 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
250 // when the session expires every |kSessionHours| hours. | |
251 if (!misspelling) | |
252 return; | |
253 misspelling->action.set_type(SpellcheckAction::TYPE_IN_DICTIONARY); | |
254 } | |
255 | |
256 void FeedbackSender::IgnoredSuggestions(uint32_t hash) { | |
257 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
258 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
259 // when the session expires every |kSessionHours| hours. | |
260 if (!misspelling) | |
261 return; | |
262 misspelling->action.set_type(SpellcheckAction::TYPE_PENDING_IGNORE); | |
263 misspelling->timestamp = base::Time::Now(); | |
264 } | |
265 | |
266 void FeedbackSender::ManuallyCorrected(uint32_t hash, | |
267 const base::string16& correction) { | |
268 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
269 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
270 // when the session expires every |kSessionHours| hours. | |
271 if (!misspelling) | |
272 return; | |
273 misspelling->action.set_type(SpellcheckAction::TYPE_MANUALLY_CORRECTED); | |
274 misspelling->action.set_value(correction); | |
275 misspelling->timestamp = base::Time::Now(); | |
276 } | |
277 | |
278 void FeedbackSender::OnReceiveDocumentMarkers( | |
279 int renderer_process_id, | |
280 const std::vector<uint32_t>& markers) { | |
281 if ((base::Time::Now() - session_start_).InHours() >= | |
282 spellcheck::kSessionHours) { | |
283 FlushFeedback(); | |
284 return; | |
285 } | |
286 | |
287 if (!feedback_.RendererHasMisspellings(renderer_process_id)) | |
288 return; | |
289 | |
290 feedback_.FinalizeRemovedMisspellings(renderer_process_id, markers); | |
291 SendFeedback(feedback_.GetMisspellingsInRenderer(renderer_process_id), | |
292 !renderers_sent_feedback_.count(renderer_process_id)); | |
293 renderers_sent_feedback_.insert(renderer_process_id); | |
294 feedback_.EraseFinalizedMisspellings(renderer_process_id); | |
295 } | |
296 | |
297 void FeedbackSender::OnSpellcheckResults( | |
298 int renderer_process_id, | |
299 const base::string16& text, | |
300 const std::vector<SpellCheckMarker>& markers, | |
301 std::vector<SpellCheckResult>* results) { | |
302 // Don't collect feedback if not going to send it. | |
303 if (!timer_.IsRunning()) | |
304 return; | |
305 | |
306 // Generate a map of marker offsets to marker hashes. This map helps to | |
307 // efficiently lookup feedback data based on the position of the misspelling | |
308 // in text. | |
309 typedef std::map<size_t, uint32_t> MarkerMap; | |
310 MarkerMap marker_map; | |
311 for (size_t i = 0; i < markers.size(); ++i) | |
312 marker_map[markers[i].offset] = markers[i].hash; | |
313 | |
314 for (auto& result : *results) { | |
315 if (!IsInBounds(result.location, result.length, text.length())) | |
316 continue; | |
317 MarkerMap::const_iterator marker_it = marker_map.find(result.location); | |
318 if (marker_it != marker_map.end() && | |
319 feedback_.HasMisspelling(marker_it->second)) { | |
320 // If the renderer already has a marker for this spellcheck result, then | |
321 // set the hash of the spellcheck result to be the same as the marker. | |
322 result.hash = marker_it->second; | |
323 } else { | |
324 // If the renderer does not yet have a marker for this spellcheck result, | |
325 // then generate a new hash for the spellcheck result. | |
326 result.hash = BuildHash(session_start_, ++misspelling_counter_); | |
327 } | |
328 // Save the feedback data for the spellcheck result. | |
329 feedback_.AddMisspelling(renderer_process_id, BuildFeedback(result, text)); | |
330 } | |
331 } | |
332 | |
333 void FeedbackSender::OnLanguageCountryChange(const std::string& language, | |
334 const std::string& country) { | |
335 FlushFeedback(); | |
336 language_ = language; | |
337 country_ = country; | |
338 } | |
339 | |
340 void FeedbackSender::StartFeedbackCollection() { | |
341 if (timer_.IsRunning()) | |
342 return; | |
343 | |
344 int interval_seconds = spellcheck::kFeedbackIntervalSeconds; | |
345 // This command-line switch is for testing and temporary. | |
346 // TODO(rouslan): Remove the command-line switch when testing is complete. | |
347 // http://crbug.com/247726 | |
348 if (base::CommandLine::ForCurrentProcess()->HasSwitch( | |
349 spellcheck::switches::kSpellingServiceFeedbackIntervalSeconds)) { | |
350 base::StringToInt( | |
351 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
352 spellcheck::switches::kSpellingServiceFeedbackIntervalSeconds), | |
353 &interval_seconds); | |
354 if (interval_seconds < kMinIntervalSeconds) | |
355 interval_seconds = kMinIntervalSeconds; | |
356 static const int kSessionSeconds = spellcheck::kSessionHours * 60 * 60; | |
357 if (interval_seconds > kSessionSeconds) | |
358 interval_seconds = kSessionSeconds; | |
359 } | |
360 timer_.Start(FROM_HERE, | |
361 base::TimeDelta::FromSeconds(interval_seconds), | |
362 this, | |
363 &FeedbackSender::RequestDocumentMarkers); | |
364 } | |
365 | |
366 void FeedbackSender::StopFeedbackCollection() { | |
367 if (!timer_.IsRunning()) | |
368 return; | |
369 | |
370 FlushFeedback(); | |
371 timer_.Stop(); | |
372 } | |
373 | |
374 void FeedbackSender::RandBytes(void* p, size_t len) { | |
375 crypto::RandBytes(p, len); | |
376 } | |
377 | |
378 void FeedbackSender::OnURLFetchComplete(const net::URLFetcher* source) { | |
379 for (ScopedVector<net::URLFetcher>::iterator sender_it = senders_.begin(); | |
380 sender_it != senders_.end(); | |
381 ++sender_it) { | |
382 if (*sender_it == source) { | |
383 senders_.erase(sender_it); | |
384 return; | |
385 } | |
386 } | |
387 delete source; | |
388 } | |
389 | |
390 void FeedbackSender::RequestDocumentMarkers() { | |
391 // Request document markers from all the renderers that are still alive. | |
392 std::set<int> alive_renderers; | |
393 for (content::RenderProcessHost::iterator it( | |
394 content::RenderProcessHost::AllHostsIterator()); | |
395 !it.IsAtEnd(); | |
396 it.Advance()) { | |
397 alive_renderers.insert(it.GetCurrentValue()->GetID()); | |
398 it.GetCurrentValue()->Send(new SpellCheckMsg_RequestDocumentMarkers()); | |
399 } | |
400 | |
401 // Asynchronously send out the feedback for all the renderers that are no | |
402 // longer alive. | |
403 std::vector<int> known_renderers = feedback_.GetRendersWithMisspellings(); | |
404 std::sort(known_renderers.begin(), known_renderers.end()); | |
405 std::vector<int> dead_renderers = | |
406 base::STLSetDifference<std::vector<int>>(known_renderers, | |
407 alive_renderers); | |
408 for (int renderer_process_id : dead_renderers) { | |
409 base::ThreadTaskRunnerHandle::Get()->PostTask( | |
410 FROM_HERE, | |
411 base::Bind(&FeedbackSender::OnReceiveDocumentMarkers, AsWeakPtr(), | |
412 renderer_process_id, std::vector<uint32_t>())); | |
413 } | |
414 } | |
415 | |
416 void FeedbackSender::FlushFeedback() { | |
417 if (feedback_.Empty()) | |
418 return; | |
419 feedback_.FinalizeAllMisspellings(); | |
420 SendFeedback(feedback_.GetAllMisspellings(), | |
421 renderers_sent_feedback_.empty()); | |
422 feedback_.Clear(); | |
423 renderers_sent_feedback_.clear(); | |
424 session_start_ = base::Time::Now(); | |
425 timer_.Reset(); | |
426 } | |
427 | |
428 void FeedbackSender::SendFeedback(const std::vector<Misspelling>& feedback_data, | |
429 bool is_first_feedback_batch) { | |
430 if (base::Time::Now() - last_salt_update_ > base::TimeDelta::FromHours(24)) { | |
431 RandBytes(&salt_, sizeof(salt_)); | |
432 last_salt_update_ = base::Time::Now(); | |
433 } | |
434 std::unique_ptr<base::Value> feedback_value(BuildFeedbackValue( | |
435 BuildParams( | |
436 BuildSuggestionInfo(feedback_data, is_first_feedback_batch, salt_), | |
437 language_, country_), | |
438 api_version_)); | |
439 std::string feedback; | |
440 base::JSONWriter::Write(*feedback_value, &feedback); | |
441 | |
442 // The tests use this identifier to mock the URL fetcher. | |
443 static const int kUrlFetcherId = 0; | |
444 net::URLFetcher* sender = | |
445 net::URLFetcher::Create(kUrlFetcherId, feedback_service_url_, | |
446 net::URLFetcher::POST, this).release(); | |
447 data_use_measurement::DataUseUserData::AttachToFetcher( | |
448 sender, data_use_measurement::DataUseUserData::SPELL_CHECKER); | |
449 sender->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES | | |
450 net::LOAD_DO_NOT_SAVE_COOKIES); | |
451 sender->SetUploadData("application/json", feedback); | |
452 senders_.push_back(sender); | |
453 | |
454 // Request context is nullptr in testing. | |
455 if (request_context_.get()) { | |
456 sender->SetRequestContext(request_context_.get()); | |
457 sender->Start(); | |
458 } | |
459 } | |
460 | |
461 } // namespace spellcheck | |
OLD | NEW |