Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(292)

Side by Side Diff: chrome/browser/spellchecker/feedback_sender.cc

Issue 2159283003: [WIP][DO NOT LAND] Componentize spellcheck Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // The |FeedbackSender| object stores the user feedback to spellcheck
6 // suggestions in a |Feedback| object.
7 //
8 // When spelling service returns spellcheck results, these results first arrive
9 // in |FeedbackSender| to assign hash identifiers for each
10 // misspelling-suggestion pair. If the spelling service identifies the same
11 // misspelling as already displayed to the user, then |FeedbackSender| reuses
12 // the same hash identifiers to avoid duplication. It detects the duplicates by
13 // comparing misspelling offsets in text. Spelling service can return duplicates
14 // because we request spellcheck for whole paragraphs, as context around a
15 // misspelled word is important to the spellcheck algorithm.
16 //
17 // All feedback is initially pending. When a user acts upon a misspelling such
18 // that the misspelling is no longer displayed (red squiggly line goes away),
19 // then the feedback for this misspelling is finalized. All finalized feedback
20 // is erased after being sent to the spelling service. Pending feedback is kept
21 // around for |kSessionHours| hours and then finalized even if user did not act
22 // on the misspellings.
23 //
24 // |FeedbackSender| periodically requests a list of hashes of all remaining
25 // misspellings in renderers. When a renderer responds with a list of hashes,
26 // |FeedbackSender| uses the list to determine which misspellings are no longer
27 // displayed to the user and sends the current state of user feedback to the
28 // spelling service.
29
30 #include "chrome/browser/spellchecker/feedback_sender.h"
31
32 #include <algorithm>
33 #include <iterator>
34 #include <utility>
35
36 #include "base/command_line.h"
37 #include "base/hash.h"
38 #include "base/json/json_writer.h"
39 #include "base/location.h"
40 #include "base/metrics/field_trial.h"
41 #include "base/single_thread_task_runner.h"
42 #include "base/stl_util.h"
43 #include "base/strings/string_number_conversions.h"
44 #include "base/strings/stringprintf.h"
45 #include "base/threading/thread_task_runner_handle.h"
46 #include "base/values.h"
47 #include "chrome/browser/spellchecker/word_trimmer.h"
48 #include "chrome/common/chrome_switches.h"
49 #include "chrome/common/spellcheck_common.h"
50 #include "chrome/common/spellcheck_marker.h"
51 #include "chrome/common/spellcheck_messages.h"
52 #include "components/data_use_measurement/core/data_use_user_data.h"
53 #include "content/public/browser/render_process_host.h"
54 #include "crypto/random.h"
55 #include "crypto/secure_hash.h"
56 #include "crypto/sha2.h"
57 #include "google_apis/google_api_keys.h"
58 #include "net/base/load_flags.h"
59 #include "net/url_request/url_fetcher.h"
60 #include "net/url_request/url_request_context_getter.h"
61
62 namespace spellcheck {
63
64 namespace {
65
66 const size_t kMaxFeedbackSizeBytes = 10 * 1024 * 1024; // 10 MB
67
68 // The default URL where feedback data is sent.
69 const char kFeedbackServiceURL[] = "https://www.googleapis.com/rpc";
70
71 // The minimum number of seconds between sending batches of feedback.
72 const int kMinIntervalSeconds = 5;
73
74 // Returns a hash of |session_start|, the current timestamp, and
75 // |suggestion_index|.
76 uint32_t BuildHash(const base::Time& session_start, size_t suggestion_index) {
77 return base::Hash(
78 base::StringPrintf("%" PRId64 "%" PRId64 "%" PRIuS,
79 session_start.ToInternalValue(),
80 base::Time::Now().ToInternalValue(),
81 suggestion_index));
82 }
83
84 uint64_t BuildAnonymousHash(const FeedbackSender::RandSalt& r,
85 const base::string16& s) {
86 std::unique_ptr<crypto::SecureHash> hash(
87 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
88
89 hash->Update(s.data(), s.size() * sizeof(s[0]));
90 hash->Update(&r, sizeof(r));
91
92 uint64_t result;
93 hash->Finish(&result, sizeof(result));
94 return result;
95 }
96
97 // Returns a pending feedback data structure for the spellcheck |result| and
98 // |text|.
99 Misspelling BuildFeedback(const SpellCheckResult& result,
100 const base::string16& text) {
101 size_t start = result.location;
102 base::string16 context = TrimWords(&start,
103 start + result.length,
104 text,
105 chrome::spellcheck_common::kContextWordCount);
106 return Misspelling(context,
107 start,
108 result.length,
109 std::vector<base::string16>(1, result.replacement),
110 result.hash);
111 }
112
113 // Builds suggestion info from |suggestions|.
114 std::unique_ptr<base::ListValue> BuildSuggestionInfo(
115 const std::vector<Misspelling>& misspellings,
116 bool is_first_feedback_batch,
117 const FeedbackSender::RandSalt& salt) {
118 std::unique_ptr<base::ListValue> list(new base::ListValue);
119 for (const auto& raw_misspelling : misspellings) {
120 std::unique_ptr<base::DictionaryValue> misspelling(
121 SerializeMisspelling(raw_misspelling));
122 misspelling->SetBoolean("isFirstInSession", is_first_feedback_batch);
123 misspelling->SetBoolean("isAutoCorrection", false);
124 // hash(R) fields come from red_underline_extensions.proto
125 // fixed64 user_misspelling_id = ...
126 misspelling->SetString(
127 "userMisspellingId",
128 base::Uint64ToString(BuildAnonymousHash(
129 salt, raw_misspelling.context.substr(raw_misspelling.location,
130 raw_misspelling.length))));
131 // repeated fixed64 user_suggestion_id = ...
132 std::unique_ptr<base::ListValue> suggestion_list(new base::ListValue());
133 for (const auto& suggestion : raw_misspelling.suggestions) {
134 suggestion_list->AppendString(
135 base::Uint64ToString(BuildAnonymousHash(salt, suggestion)));
136 }
137 misspelling->Set("userSuggestionId", suggestion_list.release());
138 list->Append(std::move(misspelling));
139 }
140 return list;
141 }
142
143 // Builds feedback parameters from |suggestion_info|, |language|, and |country|.
144 // Takes ownership of |suggestion_list|.
145 std::unique_ptr<base::DictionaryValue> BuildParams(
146 std::unique_ptr<base::ListValue> suggestion_info,
147 const std::string& language,
148 const std::string& country) {
149 std::unique_ptr<base::DictionaryValue> params(new base::DictionaryValue);
150 params->Set("suggestionInfo", suggestion_info.release());
151 params->SetString("key", google_apis::GetAPIKey());
152 params->SetString("language", language);
153 params->SetString("originCountry", country);
154 params->SetString("clientName", "Chrome");
155 return params;
156 }
157
158 // Builds feedback data from |params|. Takes ownership of |params|.
159 std::unique_ptr<base::Value> BuildFeedbackValue(
160 std::unique_ptr<base::DictionaryValue> params,
161 const std::string& api_version) {
162 std::unique_ptr<base::DictionaryValue> result(new base::DictionaryValue);
163 result->Set("params", params.release());
164 result->SetString("method", "spelling.feedback");
165 result->SetString("apiVersion", api_version);
166 return std::move(result);
167 }
168
169 // Returns true if the misspelling location is within text bounds.
170 bool IsInBounds(int misspelling_location,
171 int misspelling_length,
172 size_t text_length) {
173 return misspelling_location >= 0 && misspelling_length > 0 &&
174 static_cast<size_t>(misspelling_location) < text_length &&
175 static_cast<size_t>(misspelling_location + misspelling_length) <=
176 text_length;
177 }
178
179 // Returns the feedback API version.
180 std::string GetApiVersion() {
181 // This guard is temporary.
182 // TODO(rouslan): Remove the guard. http://crbug.com/247726
183 if (base::FieldTrialList::FindFullName(kFeedbackFieldTrialName) ==
184 kFeedbackFieldTrialEnabledGroupName &&
185 base::CommandLine::ForCurrentProcess()->HasSwitch(
186 switches::kEnableSpellingFeedbackFieldTrial)) {
187 return "v2-internal";
188 }
189 return "v2";
190 }
191
192 } // namespace
193
194 FeedbackSender::FeedbackSender(net::URLRequestContextGetter* request_context,
195 const std::string& language,
196 const std::string& country)
197 : request_context_(request_context),
198 api_version_(GetApiVersion()),
199 language_(language),
200 country_(country),
201 misspelling_counter_(0),
202 feedback_(kMaxFeedbackSizeBytes),
203 session_start_(base::Time::Now()),
204 feedback_service_url_(kFeedbackServiceURL) {
205 // The command-line switch is for testing and temporary.
206 // TODO(rouslan): Remove the command-line switch when testing is complete.
207 // http://crbug.com/247726
208 if (base::CommandLine::ForCurrentProcess()->HasSwitch(
209 switches::kSpellingServiceFeedbackUrl)) {
210 feedback_service_url_ =
211 GURL(base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
212 switches::kSpellingServiceFeedbackUrl));
213 }
214 }
215
216 FeedbackSender::~FeedbackSender() {
217 }
218
219 void FeedbackSender::SelectedSuggestion(uint32_t hash, int suggestion_index) {
220 Misspelling* misspelling = feedback_.GetMisspelling(hash);
221 // GetMisspelling() returns null for flushed feedback. Feedback is flushed
222 // when the session expires every |kSessionHours| hours.
223 if (!misspelling)
224 return;
225 misspelling->action.set_type(SpellcheckAction::TYPE_SELECT);
226 misspelling->action.set_index(suggestion_index);
227 misspelling->timestamp = base::Time::Now();
228 }
229
230 void FeedbackSender::AddedToDictionary(uint32_t hash) {
231 Misspelling* misspelling = feedback_.GetMisspelling(hash);
232 // GetMisspelling() returns null for flushed feedback. Feedback is flushed
233 // when the session expires every |kSessionHours| hours.
234 if (!misspelling)
235 return;
236 misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT);
237 misspelling->timestamp = base::Time::Now();
238 const std::set<uint32_t>& hashes =
239 feedback_.FindMisspellings(GetMisspelledString(*misspelling));
240 for (uint32_t hash : hashes) {
241 Misspelling* duplicate_misspelling = feedback_.GetMisspelling(hash);
242 if (!duplicate_misspelling || duplicate_misspelling->action.IsFinal())
243 continue;
244 duplicate_misspelling->action.set_type(SpellcheckAction::TYPE_ADD_TO_DICT);
245 duplicate_misspelling->timestamp = misspelling->timestamp;
246 }
247 }
248
249 void FeedbackSender::RecordInDictionary(uint32_t hash) {
250 Misspelling* misspelling = feedback_.GetMisspelling(hash);
251 // GetMisspelling() returns null for flushed feedback. Feedback is flushed
252 // when the session expires every |kSessionHours| hours.
253 if (!misspelling)
254 return;
255 misspelling->action.set_type(SpellcheckAction::TYPE_IN_DICTIONARY);
256 }
257
258 void FeedbackSender::IgnoredSuggestions(uint32_t hash) {
259 Misspelling* misspelling = feedback_.GetMisspelling(hash);
260 // GetMisspelling() returns null for flushed feedback. Feedback is flushed
261 // when the session expires every |kSessionHours| hours.
262 if (!misspelling)
263 return;
264 misspelling->action.set_type(SpellcheckAction::TYPE_PENDING_IGNORE);
265 misspelling->timestamp = base::Time::Now();
266 }
267
268 void FeedbackSender::ManuallyCorrected(uint32_t hash,
269 const base::string16& correction) {
270 Misspelling* misspelling = feedback_.GetMisspelling(hash);
271 // GetMisspelling() returns null for flushed feedback. Feedback is flushed
272 // when the session expires every |kSessionHours| hours.
273 if (!misspelling)
274 return;
275 misspelling->action.set_type(SpellcheckAction::TYPE_MANUALLY_CORRECTED);
276 misspelling->action.set_value(correction);
277 misspelling->timestamp = base::Time::Now();
278 }
279
280 void FeedbackSender::OnReceiveDocumentMarkers(
281 int renderer_process_id,
282 const std::vector<uint32_t>& markers) {
283 if ((base::Time::Now() - session_start_).InHours() >=
284 chrome::spellcheck_common::kSessionHours) {
285 FlushFeedback();
286 return;
287 }
288
289 if (!feedback_.RendererHasMisspellings(renderer_process_id))
290 return;
291
292 feedback_.FinalizeRemovedMisspellings(renderer_process_id, markers);
293 SendFeedback(feedback_.GetMisspellingsInRenderer(renderer_process_id),
294 !renderers_sent_feedback_.count(renderer_process_id));
295 renderers_sent_feedback_.insert(renderer_process_id);
296 feedback_.EraseFinalizedMisspellings(renderer_process_id);
297 }
298
299 void FeedbackSender::OnSpellcheckResults(
300 int renderer_process_id,
301 const base::string16& text,
302 const std::vector<SpellCheckMarker>& markers,
303 std::vector<SpellCheckResult>* results) {
304 // Don't collect feedback if not going to send it.
305 if (!timer_.IsRunning())
306 return;
307
308 // Generate a map of marker offsets to marker hashes. This map helps to
309 // efficiently lookup feedback data based on the position of the misspelling
310 // in text.
311 typedef std::map<size_t, uint32_t> MarkerMap;
312 MarkerMap marker_map;
313 for (size_t i = 0; i < markers.size(); ++i)
314 marker_map[markers[i].offset] = markers[i].hash;
315
316 for (auto& result : *results) {
317 if (!IsInBounds(result.location, result.length, text.length()))
318 continue;
319 MarkerMap::const_iterator marker_it = marker_map.find(result.location);
320 if (marker_it != marker_map.end() &&
321 feedback_.HasMisspelling(marker_it->second)) {
322 // If the renderer already has a marker for this spellcheck result, then
323 // set the hash of the spellcheck result to be the same as the marker.
324 result.hash = marker_it->second;
325 } else {
326 // If the renderer does not yet have a marker for this spellcheck result,
327 // then generate a new hash for the spellcheck result.
328 result.hash = BuildHash(session_start_, ++misspelling_counter_);
329 }
330 // Save the feedback data for the spellcheck result.
331 feedback_.AddMisspelling(renderer_process_id, BuildFeedback(result, text));
332 }
333 }
334
335 void FeedbackSender::OnLanguageCountryChange(const std::string& language,
336 const std::string& country) {
337 FlushFeedback();
338 language_ = language;
339 country_ = country;
340 }
341
342 void FeedbackSender::StartFeedbackCollection() {
343 if (timer_.IsRunning())
344 return;
345
346 int interval_seconds = chrome::spellcheck_common::kFeedbackIntervalSeconds;
347 // This command-line switch is for testing and temporary.
348 // TODO(rouslan): Remove the command-line switch when testing is complete.
349 // http://crbug.com/247726
350 if (base::CommandLine::ForCurrentProcess()->HasSwitch(
351 switches::kSpellingServiceFeedbackIntervalSeconds)) {
352 base::StringToInt(
353 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
354 switches::kSpellingServiceFeedbackIntervalSeconds),
355 &interval_seconds);
356 if (interval_seconds < kMinIntervalSeconds)
357 interval_seconds = kMinIntervalSeconds;
358 static const int kSessionSeconds =
359 chrome::spellcheck_common::kSessionHours * 60 * 60;
360 if (interval_seconds > kSessionSeconds)
361 interval_seconds = kSessionSeconds;
362 }
363 timer_.Start(FROM_HERE,
364 base::TimeDelta::FromSeconds(interval_seconds),
365 this,
366 &FeedbackSender::RequestDocumentMarkers);
367 }
368
369 void FeedbackSender::StopFeedbackCollection() {
370 if (!timer_.IsRunning())
371 return;
372
373 FlushFeedback();
374 timer_.Stop();
375 }
376
377 void FeedbackSender::RandBytes(void* p, size_t len) {
378 crypto::RandBytes(p, len);
379 }
380
381 void FeedbackSender::OnURLFetchComplete(const net::URLFetcher* source) {
382 for (ScopedVector<net::URLFetcher>::iterator sender_it = senders_.begin();
383 sender_it != senders_.end();
384 ++sender_it) {
385 if (*sender_it == source) {
386 senders_.erase(sender_it);
387 return;
388 }
389 }
390 delete source;
391 }
392
393 void FeedbackSender::RequestDocumentMarkers() {
394 // Request document markers from all the renderers that are still alive.
395 std::set<int> alive_renderers;
396 for (content::RenderProcessHost::iterator it(
397 content::RenderProcessHost::AllHostsIterator());
398 !it.IsAtEnd();
399 it.Advance()) {
400 alive_renderers.insert(it.GetCurrentValue()->GetID());
401 it.GetCurrentValue()->Send(new SpellCheckMsg_RequestDocumentMarkers());
402 }
403
404 // Asynchronously send out the feedback for all the renderers that are no
405 // longer alive.
406 std::vector<int> known_renderers = feedback_.GetRendersWithMisspellings();
407 std::sort(known_renderers.begin(), known_renderers.end());
408 std::vector<int> dead_renderers =
409 base::STLSetDifference<std::vector<int>>(known_renderers,
410 alive_renderers);
411 for (int renderer_process_id : dead_renderers) {
412 base::ThreadTaskRunnerHandle::Get()->PostTask(
413 FROM_HERE,
414 base::Bind(&FeedbackSender::OnReceiveDocumentMarkers, AsWeakPtr(),
415 renderer_process_id, std::vector<uint32_t>()));
416 }
417 }
418
419 void FeedbackSender::FlushFeedback() {
420 if (feedback_.Empty())
421 return;
422 feedback_.FinalizeAllMisspellings();
423 SendFeedback(feedback_.GetAllMisspellings(),
424 renderers_sent_feedback_.empty());
425 feedback_.Clear();
426 renderers_sent_feedback_.clear();
427 session_start_ = base::Time::Now();
428 timer_.Reset();
429 }
430
431 void FeedbackSender::SendFeedback(const std::vector<Misspelling>& feedback_data,
432 bool is_first_feedback_batch) {
433 if (base::Time::Now() - last_salt_update_ > base::TimeDelta::FromHours(24)) {
434 RandBytes(&salt_, sizeof(salt_));
435 last_salt_update_ = base::Time::Now();
436 }
437 std::unique_ptr<base::Value> feedback_value(BuildFeedbackValue(
438 BuildParams(
439 BuildSuggestionInfo(feedback_data, is_first_feedback_batch, salt_),
440 language_, country_),
441 api_version_));
442 std::string feedback;
443 base::JSONWriter::Write(*feedback_value, &feedback);
444
445 // The tests use this identifier to mock the URL fetcher.
446 static const int kUrlFetcherId = 0;
447 net::URLFetcher* sender =
448 net::URLFetcher::Create(kUrlFetcherId, feedback_service_url_,
449 net::URLFetcher::POST, this).release();
450 data_use_measurement::DataUseUserData::AttachToFetcher(
451 sender, data_use_measurement::DataUseUserData::SPELL_CHECKER);
452 sender->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
453 net::LOAD_DO_NOT_SAVE_COOKIES);
454 sender->SetUploadData("application/json", feedback);
455 senders_.push_back(sender);
456
457 // Request context is nullptr in testing.
458 if (request_context_.get()) {
459 sender->SetRequestContext(request_context_.get());
460 sender->Start();
461 }
462 }
463
464 } // namespace spellcheck
OLDNEW
« no previous file with comments | « chrome/browser/spellchecker/feedback_sender.h ('k') | chrome/browser/spellchecker/feedback_sender_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698