OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // The |FeedbackSender| object stores the user feedback to spellcheck | |
6 // suggestions in a |Feedback| object. | |
7 // | |
8 // When spelling service returns spellcheck results, these results first arrive | |
9 // in |FeedbackSender| to assign hash identifiers for each | |
10 // misspelling-suggestion pair. If the spelling service identifies the same | |
11 // misspelling as already displayed to the user, then |FeedbackSender| reuses | |
12 // the same hash identifiers to avoid duplication. It detects the duplicates by | |
13 // comparing misspelling offsets in text. Spelling service can return duplicates | |
14 // because we request spellcheck for whole paragraphs, as context around a | |
15 // misspelled word is important to the spellcheck algorithm. | |
16 // | |
17 // All feedback is initially pending. When a user acts upon a misspelling such | |
18 // that the misspelling is no longer displayed (red squiggly line goes away), | |
19 // then the feedback for this misspelling is finalized. All finalized feedback | |
20 // is erased after being sent to the spelling service. Pending feedback is kept | |
21 // around for |kSessionHours| hours and then finalized even if user did not act | |
22 // on the misspellings. | |
23 // | |
24 // |FeedbackSender| periodically requests a list of hashes of all remaining | |
25 // misspellings in renderers. When a renderer responds with a list of hashes, | |
26 // |FeedbackSender| uses the list to determine which misspellings are no longer | |
27 // displayed to the user and sends the current state of user feedback to the | |
28 // spelling service. | |
29 | |
30 #include "chrome/browser/spellchecker/feedback_sender.h" | |
31 | |
32 #include <algorithm> | |
33 #include <iterator> | |
34 #include <utility> | |
35 | |
36 #include "base/command_line.h" | |
37 #include "base/hash.h" | |
38 #include "base/json/json_writer.h" | |
39 #include "base/location.h" | |
40 #include "base/metrics/field_trial.h" | |
41 #include "base/single_thread_task_runner.h" | |
42 #include "base/stl_util.h" | |
43 #include "base/strings/string_number_conversions.h" | |
44 #include "base/strings/stringprintf.h" | |
45 #include "base/threading/thread_task_runner_handle.h" | |
46 #include "base/values.h" | |
47 #include "chrome/browser/spellchecker/word_trimmer.h" | |
48 #include "chrome/common/chrome_switches.h" | |
49 #include "chrome/common/spellcheck_common.h" | |
50 #include "chrome/common/spellcheck_marker.h" | |
51 #include "chrome/common/spellcheck_messages.h" | |
52 #include "components/data_use_measurement/core/data_use_user_data.h" | |
53 #include "content/public/browser/render_process_host.h" | |
54 #include "crypto/random.h" | |
55 #include "crypto/secure_hash.h" | |
56 #include "crypto/sha2.h" | |
57 #include "google_apis/google_api_keys.h" | |
58 #include "net/base/load_flags.h" | |
59 #include "net/url_request/url_fetcher.h" | |
60 #include "net/url_request/url_request_context_getter.h" | |
61 | |
62 namespace spellcheck { | |
63 | |
64 namespace { | |
65 | |
66 const size_t kMaxFeedbackSizeBytes = 10 * 1024 * 1024; // 10 MB | |
67 | |
68 // The default URL where feedback data is sent. | |
69 const char kFeedbackServiceURL[] = "https://www.googleapis.com/rpc"; | |
70 | |
71 // The minimum number of seconds between sending batches of feedback. | |
72 const int kMinIntervalSeconds = 5; | |
73 | |
74 // Returns a hash of |session_start|, the current timestamp, and | |
75 // |suggestion_index|. | |
76 uint32_t BuildHash(const base::Time& session_start, size_t suggestion_index) { | |
77 return base::Hash( | |
78 base::StringPrintf("%" PRId64 "%" PRId64 "%" PRIuS, | |
79 session_start.ToInternalValue(), | |
80 base::Time::Now().ToInternalValue(), | |
81 suggestion_index)); | |
82 } | |
83 | |
84 uint64_t BuildAnonymousHash(const FeedbackSender::RandSalt& r, | |
85 const base::string16& s) { | |
86 std::unique_ptr<crypto::SecureHash> hash( | |
87 crypto::SecureHash::Create(crypto::SecureHash::SHA256)); | |
88 | |
89 hash->Update(s.data(), s.size() * sizeof(s[0])); | |
90 hash->Update(&r, sizeof(r)); | |
91 | |
92 uint64_t result; | |
93 hash->Finish(&result, sizeof(result)); | |
94 return result; | |
95 } | |
96 | |
97 // Returns a pending feedback data structure for the spellcheck |result| and | |
98 // |text|. | |
99 Misspelling BuildFeedback(const SpellCheckResult& result, | |
100 const base::string16& text) { | |
101 size_t start = result.location; | |
102 base::string16 context = TrimWords(&start, | |
103 start + result.length, | |
104 text, | |
105 chrome::spellcheck_common::kContextWordCount); | |
106 return Misspelling(context, | |
107 start, | |
108 result.length, | |
109 std::vector<base::string16>(1, result.replacement), | |
110 result.hash); | |
111 } | |
112 | |
113 // Builds suggestion info from |suggestions|. | |
114 std::unique_ptr<base::ListValue> BuildSuggestionInfo( | |
115 const std::vector<Misspelling>& misspellings, | |
116 bool is_first_feedback_batch, | |
117 const FeedbackSender::RandSalt& salt) { | |
118 std::unique_ptr<base::ListValue> list(new base::ListValue); | |
119 for (const auto& raw_misspelling : misspellings) { | |
120 std::unique_ptr<base::DictionaryValue> misspelling( | |
121 SerializeMisspelling(raw_misspelling)); | |
122 misspelling->SetBoolean("isFirstInSession", is_first_feedback_batch); | |
123 misspelling->SetBoolean("isAutoCorrection", false); | |
124 // hash(R) fields come from red_underline_extensions.proto | |
125 // fixed64 user_misspelling_id = ... | |
126 misspelling->SetString( | |
127 "userMisspellingId", | |
128 base::Uint64ToString(BuildAnonymousHash( | |
129 salt, raw_misspelling.context.substr(raw_misspelling.location, | |
130 raw_misspelling.length)))); | |
131 // repeated fixed64 user_suggestion_id = ... | |
132 std::unique_ptr<base::ListValue> suggestion_list(new base::ListValue()); | |
133 for (const auto& suggestion : raw_misspelling.suggestions) { | |
134 suggestion_list->AppendString( | |
135 base::Uint64ToString(BuildAnonymousHash(salt, suggestion))); | |
136 } | |
137 misspelling->Set("userSuggestionId", suggestion_list.release()); | |
138 list->Append(std::move(misspelling)); | |
139 } | |
140 return list; | |
141 } | |
142 | |
143 // Builds feedback parameters from |suggestion_info|, |language|, and |country|. | |
144 // Takes ownership of |suggestion_list|. | |
145 std::unique_ptr<base::DictionaryValue> BuildParams( | |
146 std::unique_ptr<base::ListValue> suggestion_info, | |
147 const std::string& language, | |
148 const std::string& country) { | |
149 std::unique_ptr<base::DictionaryValue> params(new base::DictionaryValue); | |
150 params->Set("suggestionInfo", suggestion_info.release()); | |
151 params->SetString("key", google_apis::GetAPIKey()); | |
152 params->SetString("language", language); | |
153 params->SetString("originCountry", country); | |
154 params->SetString("clientName", "Chrome"); | |
155 return params; | |
156 } | |
157 | |
158 // Builds feedback data from |params|. Takes ownership of |params|. | |
159 std::unique_ptr<base::Value> BuildFeedbackValue( | |
160 std::unique_ptr<base::DictionaryValue> params, | |
161 const std::string& api_version) { | |
162 std::unique_ptr<base::DictionaryValue> result(new base::DictionaryValue); | |
163 result->Set("params", params.release()); | |
164 result->SetString("method", "spelling.feedback"); | |
165 result->SetString("apiVersion", api_version); | |
166 return std::move(result); | |
167 } | |
168 | |
169 // Returns true if the misspelling location is within text bounds. | |
170 bool IsInBounds(int misspelling_location, | |
171 int misspelling_length, | |
172 size_t text_length) { | |
173 return misspelling_location >= 0 && misspelling_length > 0 && | |
174 static_cast<size_t>(misspelling_location) < text_length && | |
175 static_cast<size_t>(misspelling_location + misspelling_length) <= | |
176 text_length; | |
177 } | |
178 | |
179 // Returns the feedback API version. | |
180 std::string GetApiVersion() { | |
181 // This guard is temporary. | |
182 // TODO(rouslan): Remove the guard. http://crbug.com/247726 | |
183 if (base::FieldTrialList::FindFullName(kFeedbackFieldTrialName) == | |
184 kFeedbackFieldTrialEnabledGroupName && | |
185 base::CommandLine::ForCurrentProcess()->HasSwitch( | |
186 switches::kEnableSpellingFeedbackFieldTrial)) { | |
187 return "v2-internal"; | |
188 } | |
189 return "v2"; | |
190 } | |
191 | |
192 } // namespace | |
193 | |
194 FeedbackSender::FeedbackSender(net::URLRequestContextGetter* request_context, | |
195 const std::string& language, | |
196 const std::string& country) | |
197 : request_context_(request_context), | |
198 api_version_(GetApiVersion()), | |
199 language_(language), | |
200 country_(country), | |
201 misspelling_counter_(0), | |
202 feedback_(kMaxFeedbackSizeBytes), | |
203 session_start_(base::Time::Now()), | |
204 feedback_service_url_(kFeedbackServiceURL) { | |
205 // The command-line switch is for testing and temporary. | |
206 // TODO(rouslan): Remove the command-line switch when testing is complete. | |
207 // http://crbug.com/247726 | |
208 if (base::CommandLine::ForCurrentProcess()->HasSwitch( | |
209 switches::kSpellingServiceFeedbackUrl)) { | |
210 feedback_service_url_ = | |
211 GURL(base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
212 switches::kSpellingServiceFeedbackUrl)); | |
213 } | |
214 } | |
215 | |
216 FeedbackSender::~FeedbackSender() { | |
217 } | |
218 | |
219 void FeedbackSender::SelectedSuggestion(uint32_t hash, int suggestion_index) { | |
220 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
221 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
222 // when the session expires every |kSessionHours| hours. | |
223 if (!misspelling) | |
224 return; | |
225 misspelling->action.set_type(SpellcheckAction::TYPE_SELECT); | |
226 misspelling->action.set_index(suggestion_index); | |
227 misspelling->timestamp = base::Time::Now(); | |
228 } | |
229 | |
230 void FeedbackSender::AddedToDictionary(uint32_t hash) { | |
231 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
232 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
233 // when the session expires every |kSessionHours| hours. | |
234 if (!misspelling) | |
235 return; | |
236 misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT); | |
237 misspelling->timestamp = base::Time::Now(); | |
238 const std::set<uint32_t>& hashes = | |
239 feedback_.FindMisspellings(GetMisspelledString(*misspelling)); | |
240 for (uint32_t hash : hashes) { | |
241 Misspelling* duplicate_misspelling = feedback_.GetMisspelling(hash); | |
242 if (!duplicate_misspelling || duplicate_misspelling->action.IsFinal()) | |
243 continue; | |
244 duplicate_misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT); | |
245 duplicate_misspelling->timestamp = misspelling->timestamp; | |
246 } | |
247 } | |
248 | |
249 void FeedbackSender::RecordInDictionary(uint32_t hash) { | |
250 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
251 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
252 // when the session expires every |kSessionHours| hours. | |
253 if (!misspelling) | |
254 return; | |
255 misspelling->action.set_type(SpellcheckAction::TYPE_IN_DICTIONARY); | |
256 } | |
257 | |
258 void FeedbackSender::IgnoredSuggestions(uint32_t hash) { | |
259 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
260 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
261 // when the session expires every |kSessionHours| hours. | |
262 if (!misspelling) | |
263 return; | |
264 misspelling->action.set_type(SpellcheckAction::TYPE_PENDING_IGNORE); | |
265 misspelling->timestamp = base::Time::Now(); | |
266 } | |
267 | |
268 void FeedbackSender::ManuallyCorrected(uint32_t hash, | |
269 const base::string16& correction) { | |
270 Misspelling* misspelling = feedback_.GetMisspelling(hash); | |
271 // GetMisspelling() returns null for flushed feedback. Feedback is flushed | |
272 // when the session expires every |kSessionHours| hours. | |
273 if (!misspelling) | |
274 return; | |
275 misspelling->action.set_type(SpellcheckAction::TYPE_MANUALLY_CORRECTED); | |
276 misspelling->action.set_value(correction); | |
277 misspelling->timestamp = base::Time::Now(); | |
278 } | |
279 | |
280 void FeedbackSender::OnReceiveDocumentMarkers( | |
281 int renderer_process_id, | |
282 const std::vector<uint32_t>& markers) { | |
283 if ((base::Time::Now() - session_start_).InHours() >= | |
284 chrome::spellcheck_common::kSessionHours) { | |
285 FlushFeedback(); | |
286 return; | |
287 } | |
288 | |
289 if (!feedback_.RendererHasMisspellings(renderer_process_id)) | |
290 return; | |
291 | |
292 feedback_.FinalizeRemovedMisspellings(renderer_process_id, markers); | |
293 SendFeedback(feedback_.GetMisspellingsInRenderer(renderer_process_id), | |
294 !renderers_sent_feedback_.count(renderer_process_id)); | |
295 renderers_sent_feedback_.insert(renderer_process_id); | |
296 feedback_.EraseFinalizedMisspellings(renderer_process_id); | |
297 } | |
298 | |
299 void FeedbackSender::OnSpellcheckResults( | |
300 int renderer_process_id, | |
301 const base::string16& text, | |
302 const std::vector<SpellCheckMarker>& markers, | |
303 std::vector<SpellCheckResult>* results) { | |
304 // Don't collect feedback if not going to send it. | |
305 if (!timer_.IsRunning()) | |
306 return; | |
307 | |
308 // Generate a map of marker offsets to marker hashes. This map helps to | |
309 // efficiently lookup feedback data based on the position of the misspelling | |
310 // in text. | |
311 typedef std::map<size_t, uint32_t> MarkerMap; | |
312 MarkerMap marker_map; | |
313 for (size_t i = 0; i < markers.size(); ++i) | |
314 marker_map[markers[i].offset] = markers[i].hash; | |
315 | |
316 for (auto& result : *results) { | |
317 if (!IsInBounds(result.location, result.length, text.length())) | |
318 continue; | |
319 MarkerMap::const_iterator marker_it = marker_map.find(result.location); | |
320 if (marker_it != marker_map.end() && | |
321 feedback_.HasMisspelling(marker_it->second)) { | |
322 // If the renderer already has a marker for this spellcheck result, then | |
323 // set the hash of the spellcheck result to be the same as the marker. | |
324 result.hash = marker_it->second; | |
325 } else { | |
326 // If the renderer does not yet have a marker for this spellcheck result, | |
327 // then generate a new hash for the spellcheck result. | |
328 result.hash = BuildHash(session_start_, ++misspelling_counter_); | |
329 } | |
330 // Save the feedback data for the spellcheck result. | |
331 feedback_.AddMisspelling(renderer_process_id, BuildFeedback(result, text)); | |
332 } | |
333 } | |
334 | |
335 void FeedbackSender::OnLanguageCountryChange(const std::string& language, | |
336 const std::string& country) { | |
337 FlushFeedback(); | |
338 language_ = language; | |
339 country_ = country; | |
340 } | |
341 | |
342 void FeedbackSender::StartFeedbackCollection() { | |
343 if (timer_.IsRunning()) | |
344 return; | |
345 | |
346 int interval_seconds = chrome::spellcheck_common::kFeedbackIntervalSeconds; | |
347 // This command-line switch is for testing and temporary. | |
348 // TODO(rouslan): Remove the command-line switch when testing is complete. | |
349 // http://crbug.com/247726 | |
350 if (base::CommandLine::ForCurrentProcess()->HasSwitch( | |
351 switches::kSpellingServiceFeedbackIntervalSeconds)) { | |
352 base::StringToInt( | |
353 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
354 switches::kSpellingServiceFeedbackIntervalSeconds), | |
355 &interval_seconds); | |
356 if (interval_seconds < kMinIntervalSeconds) | |
357 interval_seconds = kMinIntervalSeconds; | |
358 static const int kSessionSeconds = | |
359 chrome::spellcheck_common::kSessionHours * 60 * 60; | |
360 if (interval_seconds > kSessionSeconds) | |
361 interval_seconds = kSessionSeconds; | |
362 } | |
363 timer_.Start(FROM_HERE, | |
364 base::TimeDelta::FromSeconds(interval_seconds), | |
365 this, | |
366 &FeedbackSender::RequestDocumentMarkers); | |
367 } | |
368 | |
369 void FeedbackSender::StopFeedbackCollection() { | |
370 if (!timer_.IsRunning()) | |
371 return; | |
372 | |
373 FlushFeedback(); | |
374 timer_.Stop(); | |
375 } | |
376 | |
377 void FeedbackSender::RandBytes(void* p, size_t len) { | |
378 crypto::RandBytes(p, len); | |
379 } | |
380 | |
381 void FeedbackSender::OnURLFetchComplete(const net::URLFetcher* source) { | |
382 for (ScopedVector<net::URLFetcher>::iterator sender_it = senders_.begin(); | |
383 sender_it != senders_.end(); | |
384 ++sender_it) { | |
385 if (*sender_it == source) { | |
386 senders_.erase(sender_it); | |
387 return; | |
388 } | |
389 } | |
390 delete source; | |
391 } | |
392 | |
393 void FeedbackSender::RequestDocumentMarkers() { | |
394 // Request document markers from all the renderers that are still alive. | |
395 std::set<int> alive_renderers; | |
396 for (content::RenderProcessHost::iterator it( | |
397 content::RenderProcessHost::AllHostsIterator()); | |
398 !it.IsAtEnd(); | |
399 it.Advance()) { | |
400 alive_renderers.insert(it.GetCurrentValue()->GetID()); | |
401 it.GetCurrentValue()->Send(new SpellCheckMsg_RequestDocumentMarkers()); | |
402 } | |
403 | |
404 // Asynchronously send out the feedback for all the renderers that are no | |
405 // longer alive. | |
406 std::vector<int> known_renderers = feedback_.GetRendersWithMisspellings(); | |
407 std::sort(known_renderers.begin(), known_renderers.end()); | |
408 std::vector<int> dead_renderers = | |
409 base::STLSetDifference<std::vector<int>>(known_renderers, | |
410 alive_renderers); | |
411 for (int renderer_process_id : dead_renderers) { | |
412 base::ThreadTaskRunnerHandle::Get()->PostTask( | |
413 FROM_HERE, | |
414 base::Bind(&FeedbackSender::OnReceiveDocumentMarkers, AsWeakPtr(), | |
415 renderer_process_id, std::vector<uint32_t>())); | |
416 } | |
417 } | |
418 | |
419 void FeedbackSender::FlushFeedback() { | |
420 if (feedback_.Empty()) | |
421 return; | |
422 feedback_.FinalizeAllMisspellings(); | |
423 SendFeedback(feedback_.GetAllMisspellings(), | |
424 renderers_sent_feedback_.empty()); | |
425 feedback_.Clear(); | |
426 renderers_sent_feedback_.clear(); | |
427 session_start_ = base::Time::Now(); | |
428 timer_.Reset(); | |
429 } | |
430 | |
431 void FeedbackSender::SendFeedback(const std::vector<Misspelling>& feedback_data, | |
432 bool is_first_feedback_batch) { | |
433 if (base::Time::Now() - last_salt_update_ > base::TimeDelta::FromHours(24)) { | |
434 RandBytes(&salt_, sizeof(salt_)); | |
435 last_salt_update_ = base::Time::Now(); | |
436 } | |
437 std::unique_ptr<base::Value> feedback_value(BuildFeedbackValue( | |
438 BuildParams( | |
439 BuildSuggestionInfo(feedback_data, is_first_feedback_batch, salt_), | |
440 language_, country_), | |
441 api_version_)); | |
442 std::string feedback; | |
443 base::JSONWriter::Write(*feedback_value, &feedback); | |
444 | |
445 // The tests use this identifier to mock the URL fetcher. | |
446 static const int kUrlFetcherId = 0; | |
447 net::URLFetcher* sender = | |
448 net::URLFetcher::Create(kUrlFetcherId, feedback_service_url_, | |
449 net::URLFetcher::POST, this).release(); | |
450 data_use_measurement::DataUseUserData::AttachToFetcher( | |
451 sender, data_use_measurement::DataUseUserData::SPELL_CHECKER); | |
452 sender->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES | | |
453 net::LOAD_DO_NOT_SAVE_COOKIES); | |
454 sender->SetUploadData("application/json", feedback); | |
455 senders_.push_back(sender); | |
456 | |
457 // Request context is nullptr in testing. | |
458 if (request_context_.get()) { | |
459 sender->SetRequestContext(request_context_.get()); | |
460 sender->Start(); | |
461 } | |
462 } | |
463 | |
464 } // namespace spellcheck | |
OLD | NEW |