Index: chrome/browser/renderer_host/translation_service.cc |
=================================================================== |
--- chrome/browser/renderer_host/translation_service.cc (revision 37411) |
+++ chrome/browser/renderer_host/translation_service.cc (working copy) |
@@ -4,26 +4,544 @@ |
#include "chrome/browser/renderer_host/translation_service.h" |
-#include "base/string_util.h" |
-#include "chrome/browser/renderer_host/resource_message_filter.h" |
+#include "base/json/json_reader.h" |
+#include "base/stl_util-inl.h" |
+#include "chrome/browser/profile.h" |
#include "chrome/common/render_messages.h" |
+#include "net/base/escape.h" |
-TranslationService::TranslationService(ResourceMessageFilter* filter) |
- : resource_message_filter_(filter) { |
+#if defined(GOOGLE_CHROME_BUILD) |
+#include "chrome/browser/renderer_host/translate/translate_internal.h" |
+#else |
+// Defining dummy URLs for unit-tests to pass. |
+#define TRANSLATE_SERVER_URL "http://disabled" |
+#define TRANSLATE_SERVER_SECURE_URL "https://disabled" |
+#endif |
+ |
+namespace { |
+ |
+// The URLs we send translation requests to. |
+const char kServiceURL[] = TRANSLATE_SERVER_URL; |
+const char kSecureServiceURL[] = TRANSLATE_SERVER_SECURE_URL; |
+ |
+// The different params used when sending requests to the translate server. |
+const char kVersionParam[] = "v"; |
+const char kLangPairParam[] = "langpair"; |
+const char kTextParam[] = "q"; |
+const char kClientParam[] = "client"; |
+const char kFormatParam[] = "format"; |
+const char kSSLParam[] = "ssl"; |
+const char kTranslationCountParam[] = "tc"; |
+ |
+// Describes languages deemed equivalent from a translation point of view. |
+// This is used to detect unnecessary translations. |
+struct LocaleToCLDLanguage { |
+ const char* locale_language; // Language Chrome locale is in. |
+ const char* cld_language; // Language the CLD reports. |
+}; |
+LocaleToCLDLanguage kLocaleToCLDLanguages[] = { |
+ { "en-GB", "en" }, |
+ { "en-US", "en" }, |
+ { "es-419", "es" }, |
jungshik at Google
2010/01/30 01:27:17
Chrome's UI languages for Brazillian Portuguese an
|
+}; |
+ |
+// The maximum size in bytes after which the server will refuse the request. |
+const size_t kTextRequestMaxSize = 1024 * 30; |
+ |
+// Delay to wait for before sending a request to the translation server. |
+const int kSendRequestDelay = 100; |
+ |
+// Task used to send the current pending translation request for a renderer |
+// after some time has elapsed with no new request from that renderer. |
+// Note that this task is canceled when TranslationRequest is destroyed, which |
+// happens when the TranslationService is going away. So it is OK to have it |
+// have a pointer to the TranslationService. |
+class SendTranslationRequestTask : public CancelableTask { |
+ public: |
+ SendTranslationRequestTask(TranslationService* translation_service, |
+ int renderer_id, |
+ bool secure); |
+ virtual void Run(); |
+ virtual void Cancel(); |
+ |
+ private: |
+ TranslationService* translation_service_; |
+ int renderer_id_; |
+ bool secure_; |
+ bool canceled_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(SendTranslationRequestTask); |
+}; |
+ |
+} // namespace |
+ |
+// Contains the information necessary to send a request to the translation |
+// server. It is used to group several renderer queries, as to limit the |
+// load sent to the translation server. |
+struct TranslationService::TranslationRequest { |
+ TranslationRequest(int routing_id, |
+ int page_id, |
+ const std::string& source_lang, |
+ const std::string& target_lang, |
+ bool secure) |
+ : routing_id(routing_id), |
+ page_id(page_id), |
+ source_lang(source_lang), |
+ target_lang(target_lang), |
+ secure(secure), |
+ send_query_task(NULL) { |
+ renderer_request_info.reset(new RendererRequestInfoList()); |
+ } |
+ |
+ ~TranslationRequest() { |
+ if (send_query_task) |
+ send_query_task->Cancel(); |
+ } |
+ |
+ void Clear() { |
+ page_id = 0; |
+ source_lang.clear(); |
+ target_lang.clear(); |
+ query.clear(); |
+ renderer_request_info->clear(); |
+ if (send_query_task) { |
+ send_query_task->Cancel(); |
+ send_query_task = NULL; |
+ } |
+ } |
+ |
+ int routing_id; |
+ int page_id; |
+ std::string source_lang; |
+ std::string target_lang; |
+ bool secure; |
+ std::string query; |
+ // renderer_request_info is a scoped_ptr so that we avoid copying the list |
+ // when the request is sent. At that point we only transfer ownership of that |
+ // list to renderer_request_infos_. |
+ scoped_ptr<RendererRequestInfoList> renderer_request_info; |
+ CancelableTask* send_query_task; |
+}; |
+ |
+//////////////////////////////////////////////////////////////////////////////// |
+// SendTranslationRequestTask |
+ |
+SendTranslationRequestTask::SendTranslationRequestTask( |
+ TranslationService* translation_service, |
+ int renderer_id, |
+ bool secure) |
+ : translation_service_(translation_service), |
+ renderer_id_(renderer_id), |
+ secure_(secure), |
+ canceled_(false) { |
} |
+void SendTranslationRequestTask::Run() { |
+ if (canceled_) |
+ return; |
+ translation_service_-> |
+ SendTranslationRequestForRenderer(renderer_id_, secure_); |
+} |
+ |
+void SendTranslationRequestTask::Cancel() { |
+ canceled_ = true; |
+} |
+ |
+//////////////////////////////////////////////////////////////////////////////// |
+// TranslationService, public: |
+ |
+TranslationService::TranslationService(IPC::Message::Sender* message_sender) |
+ : message_sender_(message_sender) { |
+} |
+ |
+TranslationService::~TranslationService() { |
+ STLDeleteContainerPairSecondPointers(pending_translation_requests_.begin(), |
+ pending_translation_requests_.end()); |
+ STLDeleteContainerPairSecondPointers( |
+ pending_secure_translation_requests_.begin(), |
+ pending_secure_translation_requests_.end()); |
+ STLDeleteContainerPairPointers(renderer_request_infos_.begin(), |
+ renderer_request_infos_.end()); |
+} |
+ |
void TranslationService::Translate(int routing_id, |
+ int page_id, |
int work_id, |
- const std::vector<string16>& text_chunks, |
- std::string from_language, |
- std::string to_language, |
+ const TextChunks& text_chunks, |
+ const std::string& source_lang, |
+ const std::string& target_lang, |
bool secure) { |
- std::vector<string16> translated_text; |
- for (std::vector<string16>::const_iterator iter = text_chunks.begin(); |
- iter != text_chunks.end(); ++iter) { |
- translated_text.push_back(StringToUpperASCII(*iter)); |
+ TranslationRequestMap& request_map = |
+ secure ? pending_secure_translation_requests_ : |
+ pending_translation_requests_; |
+ TranslationRequestMap::iterator iter = request_map.find(routing_id); |
+ TranslationRequest* translation_request = NULL; |
+ |
+ string16 utf16_text = MergeTextChunks(text_chunks); |
+ std::string text = EscapeUrlEncodedData(UTF16ToUTF8(utf16_text)); |
+ |
+ if (iter != request_map.end()) { |
+ translation_request = iter->second; |
+ if (page_id != translation_request->page_id) { |
+ // We are getting a request from a renderer for a different page id. |
+ // This indicates we navigated away from the page that was being |
+ // translated. We should drop the current pending translations. |
+ translation_request->Clear(); |
+ // Set the new states. |
+ translation_request->page_id = page_id; |
+ translation_request->source_lang = source_lang; |
+ translation_request->target_lang = target_lang; |
+ } else { |
+ DCHECK(translation_request->source_lang == source_lang); |
+ DCHECK(translation_request->target_lang == target_lang); |
+ // Cancel the pending tasks to send the query. We'll be posting a new one |
+ // after we updated the request. |
+ translation_request->send_query_task->Cancel(); |
+ translation_request->send_query_task = NULL; |
+ if (translation_request->query.size() + text.size() >= |
+ kTextRequestMaxSize) { |
+ // The request would be too big with that last addition of text, send |
+ // the request now. (Single requests too big to be sent in 1 translation |
+ // request are dealt with below.) |
+ if (!translation_request->query.empty()) { // Single requests |
+ SendRequestToTranslationServer(translation_request); |
+ // The translation request has been deleted. |
+ translation_request = NULL; |
+ iter = request_map.end(); |
+ } |
+ } |
+ } |
} |
- resource_message_filter_->Send( |
- new ViewMsg_TranslateTextReponse(routing_id, work_id, |
- 0, translated_text)); |
+ |
+ if (translation_request == NULL) { |
+ translation_request = new TranslationRequest(routing_id, page_id, |
+ source_lang, target_lang, |
+ secure); |
+ request_map[routing_id] = translation_request; |
+ } |
+ |
+ AddTextToRequestString(&(translation_request->query), text, |
+ source_lang, target_lang, secure); |
+ |
+ translation_request->renderer_request_info->push_back( |
+ RendererRequestInfo(routing_id, work_id)); |
+ |
+ if (translation_request->query.size() > kTextRequestMaxSize) { |
+ DCHECK(translation_request->renderer_request_info->size() == 1U); |
+ // This one request is too large for the translation service. |
+ // TODO(jcampan): we should support such requests by splitting them. |
+ iter = request_map.find(routing_id); |
+ DCHECK(iter != request_map.end()); |
+ request_map.erase(iter); |
+ message_sender_->Send( |
+ new ViewMsg_TranslateTextReponse(routing_id, work_id, 1, TextChunks())); |
+ delete translation_request; |
+ return; |
+ } |
+ |
+ // Now post the new task that will ensure we'll send the request to the |
+ // translation server if no renderer requests are received within a |
+ // reasonable amount of time. |
+ DCHECK(!translation_request->send_query_task); |
+ translation_request->send_query_task = |
+ new SendTranslationRequestTask(this, routing_id, secure); |
+ MessageLoop::current()->PostDelayedTask(FROM_HERE, |
+ translation_request->send_query_task, GetSendRequestDelay()); |
} |
+ |
+void TranslationService::SendTranslationRequestForRenderer(int renderer_id, |
+ bool secure) { |
+ TranslationRequestMap& request_map = |
+ secure ? pending_secure_translation_requests_ : |
+ pending_translation_requests_; |
+ TranslationRequestMap::const_iterator iter = request_map.find(renderer_id); |
+ DCHECK(iter != request_map.end()); |
+ SendRequestToTranslationServer(iter->second); |
+} |
+ |
+void TranslationService::OnURLFetchComplete(const URLFetcher* source, |
+ const GURL& url, |
+ const URLRequestStatus& status, |
+ int response_code, |
+ const ResponseCookies& cookies, |
+ const std::string& data) { |
+ if (!status.is_success() || response_code != 200 || data.empty()) { |
+ TranslationFailed(source); |
+ return; |
+ } |
+ |
+ // If the response is a simple string, put it in an array. (The JSONReader |
+ // requires an array or map at the root.) |
+ std::string str; |
jungshik at Google
2010/01/30 01:27:17
nit: wrapped_data?
|
+ if (data.size() > 1U && data[0] == '"') { |
+ str.append("["); |
+ str.append(data); |
+ str.append("]"); |
+ } |
+ scoped_ptr<Value> value(base::JSONReader::Read(str.empty() ? data : str, |
+ true)); |
+ if (!value.get()) { |
+ NOTREACHED() << "Translation server returned invalid JSON response."; |
+ TranslationFailed(source); |
+ return; |
+ } |
+ |
+ // If the request was for a single string, the response is the translated |
+ // string. |
+ TextChunksList translated_chunks_list; |
+ if (value->IsType(Value::TYPE_STRING)) { |
+ string16 str16; |
jungshik at Google
2010/01/30 01:27:17
nit: translated_text?
|
+ if (!value->GetAsUTF16(&str16)) { |
+ NOTREACHED(); |
+ TranslationFailed(source); |
+ return; |
+ } |
+ TextChunks text_chunks; |
jungshik at Google
2010/01/30 01:27:17
nit: translated_chunks might be a better name.
|
+ text_chunks.push_back(str16); |
+ translated_chunks_list.push_back(text_chunks); |
+ } else { |
+ if (!value->IsType(Value::TYPE_LIST)) { |
+ NOTREACHED() << "Translation server returned unexpected JSON response " |
+ " (not a list)."; |
+ TranslationFailed(source); |
+ return; |
+ } |
+ ListValue* list = static_cast<ListValue*>(value.get()); |
jungshik at Google
2010/01/30 01:27:17
nit: translated_text_list?
|
+ for (size_t i = 0; i < list->GetSize(); ++i) { |
+ string16 translated_text; |
+ if (!list->GetStringAsUTF16(i, &translated_text)) { |
+ NOTREACHED() << "Translation server returned unexpected JSON response " |
+ " (unexpected type in list)."; |
+ TranslationFailed(source); |
+ return; |
+ } |
+ translated_text = UnescapeForHTML(translated_text); |
+ TranslationService::TextChunks text_chunks; |
jungshik at Google
2010/01/30 01:27:17
nit: translated_chunks might be a better name.
|
+ TranslationService::SplitTextChunks(translated_text, &text_chunks); |
+ translated_chunks_list.push_back(text_chunks); |
+ } |
+ } |
+ |
+ // We have successfully extracted all the translated text chunks, send them to |
+ // the renderer. |
+ SendResponseToRenderer(source, 0, translated_chunks_list); |
+} |
+ |
+// static |
+bool TranslationService::ShouldTranslatePage( |
+ const std::string& page_language, const std::string& chrome_language) { |
+ // Most locale names are the actual ISO 639 codes that the Google translate |
+ // API uses, but for the ones longer than 2 chars. |
jungshik at Google
2010/01/30 01:27:17
Hmm, the input parameters for this function are th
|
+ // See l10n_util.cc for the list. |
+ for (size_t i = 0; i < arraysize(kLocaleToCLDLanguages); ++i) { |
+ if (chrome_language == kLocaleToCLDLanguages[i].locale_language && |
+ page_language == kLocaleToCLDLanguages[i].cld_language) { |
+ return false; |
+ } |
+ } |
+ return true; |
+} |
+ |
+// static |
+bool TranslationService::IsTranslationEnabled() { |
+ return GURL(kServiceURL).host() != "disabled"; |
+} |
+ |
+//////////////////////////////////////////////////////////////////////////////// |
+// TranslationService, protected: |
+ |
+int TranslationService::GetSendRequestDelay() const { |
+ return kSendRequestDelay; |
+} |
+ |
+//////////////////////////////////////////////////////////////////////////////// |
+// TranslationService, private: |
+ |
+void TranslationService::SendRequestToTranslationServer( |
+ TranslationRequest* request) { |
+ DCHECK(!request->query.empty()); |
+ GURL url(request->secure ? kSecureServiceURL : kServiceURL); |
+ URLFetcher* url_fetcher = |
+ URLFetcher::Create(request->routing_id /* used in tests */, |
+ url, URLFetcher::POST, this); |
+ url_fetcher->set_upload_data("application/x-www-form-urlencoded", |
+ request->query); |
+ url_fetcher->set_request_context(Profile::GetDefaultRequestContext()); |
+ url_fetcher->Start(); |
+ |
+ // renderer_request_infos_ will now own the RendererRequestInfoList. |
+ renderer_request_infos_[url_fetcher] = |
+ request->renderer_request_info.release(); |
+ |
+ // Remove the request from the translation request map. |
+ TranslationRequestMap& translation_request_map = |
+ request->secure ? pending_secure_translation_requests_ : |
+ pending_translation_requests_; |
+ TranslationRequestMap::iterator iter = |
+ translation_request_map.find(request->routing_id); |
+ DCHECK(iter != translation_request_map.end()); |
+ translation_request_map.erase(iter); |
+ delete request; |
+} |
+ |
+void TranslationService::SendResponseToRenderer( |
+ const URLFetcher* const_url_fetcher, int error_code, |
+ const TextChunksList& text_chunks_list) { |
+ scoped_ptr<const URLFetcher> url_fetcher(const_url_fetcher); |
+ RendererRequestInfoMap::iterator iter = |
+ renderer_request_infos_.find(url_fetcher.get()); |
+ DCHECK(iter != renderer_request_infos_.end()); |
+ scoped_ptr<RendererRequestInfoList> request_info_list(iter->second); |
+ DCHECK(error_code != 0 || |
+ request_info_list->size() == text_chunks_list.size()); |
+ for (size_t i = 0; i < request_info_list->size(); ++i) { |
+ RendererRequestInfo& request_info = request_info_list->at(i); |
+ message_sender_->Send( |
+ new ViewMsg_TranslateTextReponse(request_info.routing_id, |
+ request_info.work_id, |
+ error_code, |
+ error_code ? TextChunks() : |
+ text_chunks_list[i])); |
+ } |
+ renderer_request_infos_.erase(iter); |
+} |
+ |
+void TranslationService::TranslationFailed(const URLFetcher* url_fetcher) { |
+ SendResponseToRenderer(url_fetcher, 1, TranslationService::TextChunksList()); |
+} |
+ |
+// static |
+string16 TranslationService::MergeTextChunks(const TextChunks& text_chunks) { |
+ // If there is only 1 chunk, we don't need an anchor tag as there is no order |
+ // to preserve. |
+ if (text_chunks.size() == 1U) |
+ return text_chunks[0]; |
+ |
+ string16 str; |
+ for (size_t i = 0; i < text_chunks.size(); ++i) { |
+ str.append(ASCIIToUTF16("<a _CR_TR_ id='")); |
+ str.append(IntToString16(i)); |
+ str.append(ASCIIToUTF16("'>")); |
+ str.append(text_chunks[i]); |
+ str.append(ASCIIToUTF16("</a>")); |
+ } |
+ return str; |
+} |
+ |
+// static |
+void TranslationService::SplitTextChunks(const string16& translated_text, |
jungshik at Google
2010/01/30 01:27:17
nit: SplitIntoTextChunks seems better.
|
+ TextChunks* text_chunks) { |
+ const string16 kOpenTag = ASCIIToUTF16("<a _CR_TR_ "); |
+ const string16 kCloseTag = ASCIIToUTF16("</a>"); |
+ const size_t open_tag_len = kOpenTag.size(); |
+ |
+ size_t start_index = translated_text.find(kOpenTag); |
+ if (start_index == std::string::npos) { |
+ // No magic anchor tag, it was a single chunk. |
+ text_chunks->push_back(translated_text); |
+ return; |
+ } |
+ |
+ // The server might send us some HTML with duplicated and unbalanced tags. |
+ // We separate from the open tag to the next open tag located after at least |
+ // one close tag. |
+ while (start_index != std::string::npos) { |
+ size_t stop_index = |
+ translated_text.find(kCloseTag, start_index + open_tag_len); |
+ string16 chunk; |
+ if (stop_index == std::string::npos) { |
+ // No close tag. Just report as one chunk. |
+ chunk = translated_text; |
+ start_index = std::string::npos; // So we break on next iteration. |
+ } else { |
+ // Now find the next open tag after this close tag. |
+ stop_index = translated_text.find(kOpenTag, stop_index); |
+ if (stop_index != std::string::npos) { |
+ chunk = translated_text.substr(start_index, stop_index - start_index); |
+ start_index = stop_index; |
+ } else { |
+ chunk = translated_text.substr(start_index); |
+ start_index = std::string::npos; // So we break on next iteration. |
+ } |
+ } |
+ chunk = RemoveTag(chunk); |
+ // The translation server leaves some ampersand character in the |
+ // translation. |
+ chunk = UnescapeForHTML(chunk); |
+ text_chunks->push_back(RemoveTag(chunk)); |
+ } |
+} |
+ |
+// static |
+string16 TranslationService::RemoveTag(const string16& text) { |
+ // Remove any anchor tags, knowing they could be extra/unbalanced tags. |
+ const string16 kStartTag(ASCIIToUTF16("<a ")); |
+ const string16 kEndTag(ASCIIToUTF16("</a>")); |
+ const string16 kGreaterThan(ASCIIToUTF16(">")); |
+ const string16 kLessThan(ASCIIToUTF16("<")); |
+ |
+ string16 result; |
+ size_t start_index = text.find(kStartTag); |
+ if (start_index == std::string::npos) { |
+ result = text; |
+ } else { |
+ bool first_iter = true; |
+ while (true) { |
+ size_t stop_index = text.find(kGreaterThan, start_index); |
+ size_t next_tag_index = text.find(kLessThan, start_index + 1); |
+ // Ignore unclosed <a tag. (Ignore subsequent closing tags, they'll be |
+ // removed in the next loop.) |
+ if (stop_index == std::string::npos || |
+ (next_tag_index != std::string::npos && |
+ stop_index > next_tag_index)) { |
+ result.append(text.substr(start_index)); |
+ break; |
+ } |
+ if (start_index > 0 && first_iter) |
+ result = text.substr(0, start_index); |
+ start_index = text.find(kStartTag, start_index + 1); |
+ if (start_index == std::string::npos) { |
+ result += text.substr(stop_index + 1); |
+ break; |
+ } |
+ result += text.substr(stop_index + 1, start_index - stop_index - 1); |
+ first_iter = false; |
+ } |
+ } |
+ |
+ // Now remove </a> tags. |
+ ReplaceSubstringsAfterOffset(&result, 0, |
+ ASCIIToUTF16("</a>"), ASCIIToUTF16("")); |
+ return result; |
+} |
+ |
+// static |
+void TranslationService::AddTextToRequestString(std::string* request, |
+ const std::string& text, |
+ const std::string& source_lang, |
+ const std::string& target_lang, |
+ bool secure) { |
+ if (request->empty()) { |
+ // First request, add required parameters. |
+ request->append(kVersionParam); |
+ request->append("=1.0&"); |
+ request->append(kClientParam); |
+ request->append("=cr&"); // cr = Chrome. |
+ request->append(kFormatParam); |
+ request->append("=html&"); |
+ request->append(kLangPairParam); |
+ request->append("="); |
+ request->append(source_lang); |
+ request->append("%7C"); // | URL encoded. |
+ request->append(target_lang); |
+ if (secure) { |
+ request->append("&"); |
+ request->append(kSSLParam); |
+ request->append("=1"); |
+ } |
+ } |
+ request->append("&"); |
+ request->append(kTextParam); |
+ request->append("="); |
+ request->append(text); |
+} |