Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: chrome/browser/renderer_host/translation_service.cc

Issue 552216: This CL makes the TranslationService class send the text to be translated to ... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/renderer_host/translation_service.h" 5 #include "chrome/browser/renderer_host/translation_service.h"
6 6
7 #include "base/string_util.h" 7 #include "base/json/json_reader.h"
8 #include "chrome/browser/renderer_host/resource_message_filter.h" 8 #include "base/stl_util-inl.h"
9 #include "chrome/browser/profile.h"
9 #include "chrome/common/render_messages.h" 10 #include "chrome/common/render_messages.h"
10 11 #include "net/base/escape.h"
11 TranslationService::TranslationService(ResourceMessageFilter* filter) 12
12 : resource_message_filter_(filter) { 13 #if defined(GOOGLE_CHROME_BUILD)
14 #include "chrome/browser/renderer_host/translate/translate_internal.h"
15 #else
16 // Defining dummy URLs for unit-tests to pass.
17 #define TRANSLATE_SERVER_URL "http://disabled"
18 #define TRANSLATE_SERVER_SECURE_URL "https://disabled"
19 #endif
20
21 namespace {
22
23 // The URLs we send translation requests to.
24 const char kServiceURL[] = TRANSLATE_SERVER_URL;
25 const char kSecureServiceURL[] = TRANSLATE_SERVER_SECURE_URL;
26
27 // The different params used when sending requests to the translate server.
28 const char kVersionParam[] = "v";
29 const char kLangPairParam[] = "langpair";
30 const char kTextParam[] = "q";
31 const char kClientParam[] = "client";
32 const char kFormatParam[] = "format";
33 const char kSSLParam[] = "ssl";
34 const char kTranslationCountParam[] = "tc";
35
36 // Describes languages deemed equivalent from a translation point of view.
37 // This is used to detect unnecessary translations.
38 struct LocaleToCLDLanguage {
39 const char* locale_language; // Language Chrome locale is in.
40 const char* cld_language; // Language the CLD reports.
41 };
42 LocaleToCLDLanguage kLocaleToCLDLanguages[] = {
43 { "en-GB", "en" },
44 { "en-US", "en" },
45 { "es-419", "es" },
jungshik at Google 2010/01/30 01:27:17 Chrome's UI languages for Brazillian Portuguese an
46 };
47
48 // The maximum size in bytes after which the server will refuse the request.
49 const size_t kTextRequestMaxSize = 1024 * 30;
50
51 // Delay to wait for before sending a request to the translation server.
52 const int kSendRequestDelay = 100;
53
54 // Task used to send the current pending translation request for a renderer
55 // after some time has elapsed with no new request from that renderer.
56 // Note that this task is canceled when TranslationRequest is destroyed, which
57 // happens when the TranslationService is going away. So it is OK to have it
58 // have a pointer to the TranslationService.
59 class SendTranslationRequestTask : public CancelableTask {
60 public:
61 SendTranslationRequestTask(TranslationService* translation_service,
62 int renderer_id,
63 bool secure);
64 virtual void Run();
65 virtual void Cancel();
66
67 private:
68 TranslationService* translation_service_;
69 int renderer_id_;
70 bool secure_;
71 bool canceled_;
72
73 DISALLOW_COPY_AND_ASSIGN(SendTranslationRequestTask);
74 };
75
76 } // namespace
77
78 // Contains the information necessary to send a request to the translation
79 // server. It is used to group several renderer queries, as to limit the
80 // load sent to the translation server.
81 struct TranslationService::TranslationRequest {
82 TranslationRequest(int routing_id,
83 int page_id,
84 const std::string& source_lang,
85 const std::string& target_lang,
86 bool secure)
87 : routing_id(routing_id),
88 page_id(page_id),
89 source_lang(source_lang),
90 target_lang(target_lang),
91 secure(secure),
92 send_query_task(NULL) {
93 renderer_request_info.reset(new RendererRequestInfoList());
94 }
95
96 ~TranslationRequest() {
97 if (send_query_task)
98 send_query_task->Cancel();
99 }
100
101 void Clear() {
102 page_id = 0;
103 source_lang.clear();
104 target_lang.clear();
105 query.clear();
106 renderer_request_info->clear();
107 if (send_query_task) {
108 send_query_task->Cancel();
109 send_query_task = NULL;
110 }
111 }
112
113 int routing_id;
114 int page_id;
115 std::string source_lang;
116 std::string target_lang;
117 bool secure;
118 std::string query;
119 // renderer_request_info is a scoped_ptr so that we avoid copying the list
120 // when the request is sent. At that point we only transfer ownership of that
121 // list to renderer_request_infos_.
122 scoped_ptr<RendererRequestInfoList> renderer_request_info;
123 CancelableTask* send_query_task;
124 };
125
126 ////////////////////////////////////////////////////////////////////////////////
127 // SendTranslationRequestTask
128
129 SendTranslationRequestTask::SendTranslationRequestTask(
130 TranslationService* translation_service,
131 int renderer_id,
132 bool secure)
133 : translation_service_(translation_service),
134 renderer_id_(renderer_id),
135 secure_(secure),
136 canceled_(false) {
137 }
138
139 void SendTranslationRequestTask::Run() {
140 if (canceled_)
141 return;
142 translation_service_->
143 SendTranslationRequestForRenderer(renderer_id_, secure_);
144 }
145
146 void SendTranslationRequestTask::Cancel() {
147 canceled_ = true;
148 }
149
150 ////////////////////////////////////////////////////////////////////////////////
151 // TranslationService, public:
152
153 TranslationService::TranslationService(IPC::Message::Sender* message_sender)
154 : message_sender_(message_sender) {
155 }
156
157 TranslationService::~TranslationService() {
158 STLDeleteContainerPairSecondPointers(pending_translation_requests_.begin(),
159 pending_translation_requests_.end());
160 STLDeleteContainerPairSecondPointers(
161 pending_secure_translation_requests_.begin(),
162 pending_secure_translation_requests_.end());
163 STLDeleteContainerPairPointers(renderer_request_infos_.begin(),
164 renderer_request_infos_.end());
13 } 165 }
14 166
15 void TranslationService::Translate(int routing_id, 167 void TranslationService::Translate(int routing_id,
168 int page_id,
16 int work_id, 169 int work_id,
17 const std::vector<string16>& text_chunks, 170 const TextChunks& text_chunks,
18 std::string from_language, 171 const std::string& source_lang,
19 std::string to_language, 172 const std::string& target_lang,
20 bool secure) { 173 bool secure) {
21 std::vector<string16> translated_text; 174 TranslationRequestMap& request_map =
22 for (std::vector<string16>::const_iterator iter = text_chunks.begin(); 175 secure ? pending_secure_translation_requests_ :
23 iter != text_chunks.end(); ++iter) { 176 pending_translation_requests_;
24 translated_text.push_back(StringToUpperASCII(*iter)); 177 TranslationRequestMap::iterator iter = request_map.find(routing_id);
25 } 178 TranslationRequest* translation_request = NULL;
26 resource_message_filter_->Send( 179
27 new ViewMsg_TranslateTextReponse(routing_id, work_id, 180 string16 utf16_text = MergeTextChunks(text_chunks);
28 0, translated_text)); 181 std::string text = EscapeUrlEncodedData(UTF16ToUTF8(utf16_text));
29 } 182
183 if (iter != request_map.end()) {
184 translation_request = iter->second;
185 if (page_id != translation_request->page_id) {
186 // We are getting a request from a renderer for a different page id.
187 // This indicates we navigated away from the page that was being
188 // translated. We should drop the current pending translations.
189 translation_request->Clear();
190 // Set the new states.
191 translation_request->page_id = page_id;
192 translation_request->source_lang = source_lang;
193 translation_request->target_lang = target_lang;
194 } else {
195 DCHECK(translation_request->source_lang == source_lang);
196 DCHECK(translation_request->target_lang == target_lang);
197 // Cancel the pending tasks to send the query. We'll be posting a new one
198 // after we updated the request.
199 translation_request->send_query_task->Cancel();
200 translation_request->send_query_task = NULL;
201 if (translation_request->query.size() + text.size() >=
202 kTextRequestMaxSize) {
203 // The request would be too big with that last addition of text, send
204 // the request now. (Single requests too big to be sent in 1 translation
205 // request are dealt with below.)
206 if (!translation_request->query.empty()) { // Single requests
207 SendRequestToTranslationServer(translation_request);
208 // The translation request has been deleted.
209 translation_request = NULL;
210 iter = request_map.end();
211 }
212 }
213 }
214 }
215
216 if (translation_request == NULL) {
217 translation_request = new TranslationRequest(routing_id, page_id,
218 source_lang, target_lang,
219 secure);
220 request_map[routing_id] = translation_request;
221 }
222
223 AddTextToRequestString(&(translation_request->query), text,
224 source_lang, target_lang, secure);
225
226 translation_request->renderer_request_info->push_back(
227 RendererRequestInfo(routing_id, work_id));
228
229 if (translation_request->query.size() > kTextRequestMaxSize) {
230 DCHECK(translation_request->renderer_request_info->size() == 1U);
231 // This one request is too large for the translation service.
232 // TODO(jcampan): we should support such requests by splitting them.
233 iter = request_map.find(routing_id);
234 DCHECK(iter != request_map.end());
235 request_map.erase(iter);
236 message_sender_->Send(
237 new ViewMsg_TranslateTextReponse(routing_id, work_id, 1, TextChunks()));
238 delete translation_request;
239 return;
240 }
241
242 // Now post the new task that will ensure we'll send the request to the
243 // translation server if no renderer requests are received within a
244 // reasonable amount of time.
245 DCHECK(!translation_request->send_query_task);
246 translation_request->send_query_task =
247 new SendTranslationRequestTask(this, routing_id, secure);
248 MessageLoop::current()->PostDelayedTask(FROM_HERE,
249 translation_request->send_query_task, GetSendRequestDelay());
250 }
251
252 void TranslationService::SendTranslationRequestForRenderer(int renderer_id,
253 bool secure) {
254 TranslationRequestMap& request_map =
255 secure ? pending_secure_translation_requests_ :
256 pending_translation_requests_;
257 TranslationRequestMap::const_iterator iter = request_map.find(renderer_id);
258 DCHECK(iter != request_map.end());
259 SendRequestToTranslationServer(iter->second);
260 }
261
262 void TranslationService::OnURLFetchComplete(const URLFetcher* source,
263 const GURL& url,
264 const URLRequestStatus& status,
265 int response_code,
266 const ResponseCookies& cookies,
267 const std::string& data) {
268 if (!status.is_success() || response_code != 200 || data.empty()) {
269 TranslationFailed(source);
270 return;
271 }
272
273 // If the response is a simple string, put it in an array. (The JSONReader
274 // requires an array or map at the root.)
275 std::string str;
jungshik at Google 2010/01/30 01:27:17 nit: wrapped_data?
276 if (data.size() > 1U && data[0] == '"') {
277 str.append("[");
278 str.append(data);
279 str.append("]");
280 }
281 scoped_ptr<Value> value(base::JSONReader::Read(str.empty() ? data : str,
282 true));
283 if (!value.get()) {
284 NOTREACHED() << "Translation server returned invalid JSON response.";
285 TranslationFailed(source);
286 return;
287 }
288
289 // If the request was for a single string, the response is the translated
290 // string.
291 TextChunksList translated_chunks_list;
292 if (value->IsType(Value::TYPE_STRING)) {
293 string16 str16;
jungshik at Google 2010/01/30 01:27:17 nit: translated_text?
294 if (!value->GetAsUTF16(&str16)) {
295 NOTREACHED();
296 TranslationFailed(source);
297 return;
298 }
299 TextChunks text_chunks;
jungshik at Google 2010/01/30 01:27:17 nit: translated_chunks might be a better name.
300 text_chunks.push_back(str16);
301 translated_chunks_list.push_back(text_chunks);
302 } else {
303 if (!value->IsType(Value::TYPE_LIST)) {
304 NOTREACHED() << "Translation server returned unexpected JSON response "
305 " (not a list).";
306 TranslationFailed(source);
307 return;
308 }
309 ListValue* list = static_cast<ListValue*>(value.get());
jungshik at Google 2010/01/30 01:27:17 nit: translated_text_list?
310 for (size_t i = 0; i < list->GetSize(); ++i) {
311 string16 translated_text;
312 if (!list->GetStringAsUTF16(i, &translated_text)) {
313 NOTREACHED() << "Translation server returned unexpected JSON response "
314 " (unexpected type in list).";
315 TranslationFailed(source);
316 return;
317 }
318 translated_text = UnescapeForHTML(translated_text);
319 TranslationService::TextChunks text_chunks;
jungshik at Google 2010/01/30 01:27:17 nit: translated_chunks might be a better name.
320 TranslationService::SplitTextChunks(translated_text, &text_chunks);
321 translated_chunks_list.push_back(text_chunks);
322 }
323 }
324
325 // We have successfully extracted all the translated text chunks, send them to
326 // the renderer.
327 SendResponseToRenderer(source, 0, translated_chunks_list);
328 }
329
330 // static
331 bool TranslationService::ShouldTranslatePage(
332 const std::string& page_language, const std::string& chrome_language) {
333 // Most locale names are the actual ISO 639 codes that the Google translate
334 // API uses, but for the ones longer than 2 chars.
jungshik at Google 2010/01/30 01:27:17 Hmm, the input parameters for this function are th
335 // See l10n_util.cc for the list.
336 for (size_t i = 0; i < arraysize(kLocaleToCLDLanguages); ++i) {
337 if (chrome_language == kLocaleToCLDLanguages[i].locale_language &&
338 page_language == kLocaleToCLDLanguages[i].cld_language) {
339 return false;
340 }
341 }
342 return true;
343 }
344
345 // static
346 bool TranslationService::IsTranslationEnabled() {
347 return GURL(kServiceURL).host() != "disabled";
348 }
349
350 ////////////////////////////////////////////////////////////////////////////////
351 // TranslationService, protected:
352
353 int TranslationService::GetSendRequestDelay() const {
354 return kSendRequestDelay;
355 }
356
357 ////////////////////////////////////////////////////////////////////////////////
358 // TranslationService, private:
359
360 void TranslationService::SendRequestToTranslationServer(
361 TranslationRequest* request) {
362 DCHECK(!request->query.empty());
363 GURL url(request->secure ? kSecureServiceURL : kServiceURL);
364 URLFetcher* url_fetcher =
365 URLFetcher::Create(request->routing_id /* used in tests */,
366 url, URLFetcher::POST, this);
367 url_fetcher->set_upload_data("application/x-www-form-urlencoded",
368 request->query);
369 url_fetcher->set_request_context(Profile::GetDefaultRequestContext());
370 url_fetcher->Start();
371
372 // renderer_request_infos_ will now own the RendererRequestInfoList.
373 renderer_request_infos_[url_fetcher] =
374 request->renderer_request_info.release();
375
376 // Remove the request from the translation request map.
377 TranslationRequestMap& translation_request_map =
378 request->secure ? pending_secure_translation_requests_ :
379 pending_translation_requests_;
380 TranslationRequestMap::iterator iter =
381 translation_request_map.find(request->routing_id);
382 DCHECK(iter != translation_request_map.end());
383 translation_request_map.erase(iter);
384 delete request;
385 }
386
387 void TranslationService::SendResponseToRenderer(
388 const URLFetcher* const_url_fetcher, int error_code,
389 const TextChunksList& text_chunks_list) {
390 scoped_ptr<const URLFetcher> url_fetcher(const_url_fetcher);
391 RendererRequestInfoMap::iterator iter =
392 renderer_request_infos_.find(url_fetcher.get());
393 DCHECK(iter != renderer_request_infos_.end());
394 scoped_ptr<RendererRequestInfoList> request_info_list(iter->second);
395 DCHECK(error_code != 0 ||
396 request_info_list->size() == text_chunks_list.size());
397 for (size_t i = 0; i < request_info_list->size(); ++i) {
398 RendererRequestInfo& request_info = request_info_list->at(i);
399 message_sender_->Send(
400 new ViewMsg_TranslateTextReponse(request_info.routing_id,
401 request_info.work_id,
402 error_code,
403 error_code ? TextChunks() :
404 text_chunks_list[i]));
405 }
406 renderer_request_infos_.erase(iter);
407 }
408
409 void TranslationService::TranslationFailed(const URLFetcher* url_fetcher) {
410 SendResponseToRenderer(url_fetcher, 1, TranslationService::TextChunksList());
411 }
412
413 // static
414 string16 TranslationService::MergeTextChunks(const TextChunks& text_chunks) {
415 // If there is only 1 chunk, we don't need an anchor tag as there is no order
416 // to preserve.
417 if (text_chunks.size() == 1U)
418 return text_chunks[0];
419
420 string16 str;
421 for (size_t i = 0; i < text_chunks.size(); ++i) {
422 str.append(ASCIIToUTF16("<a _CR_TR_ id='"));
423 str.append(IntToString16(i));
424 str.append(ASCIIToUTF16("'>"));
425 str.append(text_chunks[i]);
426 str.append(ASCIIToUTF16("</a>"));
427 }
428 return str;
429 }
430
431 // static
432 void TranslationService::SplitTextChunks(const string16& translated_text,
jungshik at Google 2010/01/30 01:27:17 nit: SplitIntoTextChunks seems better.
433 TextChunks* text_chunks) {
434 const string16 kOpenTag = ASCIIToUTF16("<a _CR_TR_ ");
435 const string16 kCloseTag = ASCIIToUTF16("</a>");
436 const size_t open_tag_len = kOpenTag.size();
437
438 size_t start_index = translated_text.find(kOpenTag);
439 if (start_index == std::string::npos) {
440 // No magic anchor tag, it was a single chunk.
441 text_chunks->push_back(translated_text);
442 return;
443 }
444
445 // The server might send us some HTML with duplicated and unbalanced tags.
446 // We separate from the open tag to the next open tag located after at least
447 // one close tag.
448 while (start_index != std::string::npos) {
449 size_t stop_index =
450 translated_text.find(kCloseTag, start_index + open_tag_len);
451 string16 chunk;
452 if (stop_index == std::string::npos) {
453 // No close tag. Just report as one chunk.
454 chunk = translated_text;
455 start_index = std::string::npos; // So we break on next iteration.
456 } else {
457 // Now find the next open tag after this close tag.
458 stop_index = translated_text.find(kOpenTag, stop_index);
459 if (stop_index != std::string::npos) {
460 chunk = translated_text.substr(start_index, stop_index - start_index);
461 start_index = stop_index;
462 } else {
463 chunk = translated_text.substr(start_index);
464 start_index = std::string::npos; // So we break on next iteration.
465 }
466 }
467 chunk = RemoveTag(chunk);
468 // The translation server leaves some ampersand character in the
469 // translation.
470 chunk = UnescapeForHTML(chunk);
471 text_chunks->push_back(RemoveTag(chunk));
472 }
473 }
474
475 // static
476 string16 TranslationService::RemoveTag(const string16& text) {
477 // Remove any anchor tags, knowing they could be extra/unbalanced tags.
478 const string16 kStartTag(ASCIIToUTF16("<a "));
479 const string16 kEndTag(ASCIIToUTF16("</a>"));
480 const string16 kGreaterThan(ASCIIToUTF16(">"));
481 const string16 kLessThan(ASCIIToUTF16("<"));
482
483 string16 result;
484 size_t start_index = text.find(kStartTag);
485 if (start_index == std::string::npos) {
486 result = text;
487 } else {
488 bool first_iter = true;
489 while (true) {
490 size_t stop_index = text.find(kGreaterThan, start_index);
491 size_t next_tag_index = text.find(kLessThan, start_index + 1);
492 // Ignore unclosed <a tag. (Ignore subsequent closing tags, they'll be
493 // removed in the next loop.)
494 if (stop_index == std::string::npos ||
495 (next_tag_index != std::string::npos &&
496 stop_index > next_tag_index)) {
497 result.append(text.substr(start_index));
498 break;
499 }
500 if (start_index > 0 && first_iter)
501 result = text.substr(0, start_index);
502 start_index = text.find(kStartTag, start_index + 1);
503 if (start_index == std::string::npos) {
504 result += text.substr(stop_index + 1);
505 break;
506 }
507 result += text.substr(stop_index + 1, start_index - stop_index - 1);
508 first_iter = false;
509 }
510 }
511
512 // Now remove </a> tags.
513 ReplaceSubstringsAfterOffset(&result, 0,
514 ASCIIToUTF16("</a>"), ASCIIToUTF16(""));
515 return result;
516 }
517
518 // static
519 void TranslationService::AddTextToRequestString(std::string* request,
520 const std::string& text,
521 const std::string& source_lang,
522 const std::string& target_lang,
523 bool secure) {
524 if (request->empty()) {
525 // First request, add required parameters.
526 request->append(kVersionParam);
527 request->append("=1.0&");
528 request->append(kClientParam);
529 request->append("=cr&"); // cr = Chrome.
530 request->append(kFormatParam);
531 request->append("=html&");
532 request->append(kLangPairParam);
533 request->append("=");
534 request->append(source_lang);
535 request->append("%7C"); // | URL encoded.
536 request->append(target_lang);
537 if (secure) {
538 request->append("&");
539 request->append(kSSLParam);
540 request->append("=1");
541 }
542 }
543 request->append("&");
544 request->append(kTextParam);
545 request->append("=");
546 request->append(text);
547 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698