| OLD | NEW | 
|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" | 
| 6 | 6 | 
| 7 #include "base/bind.h" | 7 #include "base/bind.h" | 
| 8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" | 
| 9 #include "base/logging.h" | 9 #include "base/logging.h" | 
| 10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" | 
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 85   // meta tag for "content-language".  This may or may not also | 85   // meta tag for "content-language".  This may or may not also | 
| 86   // have a value derived from the actual Content-Language HTTP | 86   // have a value derived from the actual Content-Language HTTP | 
| 87   // header.  The two actually have different meanings (despite the | 87   // header.  The two actually have different meanings (despite the | 
| 88   // original intent of http-equiv to be an equivalent) with the former | 88   // original intent of http-equiv to be an equivalent) with the former | 
| 89   // being the language of the document and the latter being the | 89   // being the language of the document and the latter being the | 
| 90   // language of the intended audience (a distinction really only | 90   // language of the intended audience (a distinction really only | 
| 91   // relevant for things like langauge textbooks).  This distinction | 91   // relevant for things like langauge textbooks).  This distinction | 
| 92   // shouldn't affect translation. | 92   // shouldn't affect translation. | 
| 93   WebDocument document = GetMainFrame()->document(); | 93   WebDocument document = GetMainFrame()->document(); | 
| 94   std::string content_language = document.contentLanguage().utf8(); | 94   std::string content_language = document.contentLanguage().utf8(); | 
| 95   std::string language = DeterminePageLanguage(content_language, contents); | 95   std::string cld_language; | 
|  | 96   bool is_cld_reliable; | 
|  | 97   std::string language = DeterminePageLanguage( | 
|  | 98       content_language, contents, &cld_language, &is_cld_reliable); | 
|  | 99 | 
| 96   if (language.empty()) | 100   if (language.empty()) | 
| 97     return; | 101     return; | 
| 98 | 102 | 
|  | 103   GURL url(document.url()); | 
|  | 104   LanguageDetectionDetails details; | 
|  | 105   details.time = base::Time::Now(); | 
|  | 106   details.url = url; | 
|  | 107   details.content_language = content_language; | 
|  | 108   details.cld_language = cld_language; | 
|  | 109   details.is_cld_reliable = is_cld_reliable; | 
|  | 110   details.adopted_language = language; | 
|  | 111 | 
| 99   Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 112   Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 
| 100       routing_id(), language, IsPageTranslatable(&document))); | 113       routing_id(), details, IsPageTranslatable(&document))); | 
| 101 } | 114 } | 
| 102 | 115 | 
| 103 void TranslateHelper::CancelPendingTranslation() { | 116 void TranslateHelper::CancelPendingTranslation() { | 
| 104   weak_method_factory_.InvalidateWeakPtrs(); | 117   weak_method_factory_.InvalidateWeakPtrs(); | 
| 105   translation_pending_ = false; | 118   translation_pending_ = false; | 
| 106   page_id_ = -1; | 119   page_id_ = -1; | 
| 107   source_lang_.clear(); | 120   source_lang_.clear(); | 
| 108   target_lang_.clear(); | 121   target_lang_.clear(); | 
| 109 } | 122 } | 
| 110 | 123 | 
| 111 #if defined(ENABLE_LANGUAGE_DETECTION) | 124 #if defined(ENABLE_LANGUAGE_DETECTION) | 
| 112 // static | 125 // static | 
| 113 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 126 std::string TranslateHelper::DetermineTextLanguage(const string16& text, | 
|  | 127                                                    bool* is_cld_reliable) { | 
| 114   std::string language = chrome::kUnknownLanguageCode; | 128   std::string language = chrome::kUnknownLanguageCode; | 
| 115   int num_languages = 0; | 129   int num_languages = 0; | 
| 116   int text_bytes = 0; | 130   int text_bytes = 0; | 
| 117   bool is_reliable = false; | 131   bool is_reliable = false; | 
| 118   Language cld_language = | 132   Language cld_language = | 
| 119       DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 133       DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 
| 120                                   &num_languages, NULL, &text_bytes); | 134                                   &num_languages, NULL, &text_bytes); | 
|  | 135   if (is_cld_reliable != NULL) | 
|  | 136     *is_cld_reliable = is_reliable; | 
|  | 137 | 
| 121   // We don't trust the result if the CLD reports that the detection is not | 138   // We don't trust the result if the CLD reports that the detection is not | 
| 122   // reliable, or if the actual text used to detect the language was less than | 139   // reliable, or if the actual text used to detect the language was less than | 
| 123   // 100 bytes (short texts can often lead to wrong results). | 140   // 100 bytes (short texts can often lead to wrong results). | 
| 124   // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 141   // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 
| 125   // the determined language code is correct with 50% confidence. Chrome should | 142   // the determined language code is correct with 50% confidence. Chrome should | 
| 126   // handle the real confidence value to judge. | 143   // handle the real confidence value to judge. | 
| 127   if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 144   if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 
| 128       cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 145       cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 
| 129     // We should not use LanguageCode_ISO_639_1 because it does not cover all | 146     // We should not use LanguageCode_ISO_639_1 because it does not cover all | 
| 130     // the languages CLD can detect. As a result, it'll return the invalid | 147     // the languages CLD can detect. As a result, it'll return the invalid | 
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 289   size_t dash_index = code->find('-'); | 306   size_t dash_index = code->find('-'); | 
| 290   if (!(dash_index == 2 && code->size() == 5) && | 307   if (!(dash_index == 2 && code->size() == 5) && | 
| 291       !(dash_index == std::string::npos && code->size() == 2)) { | 308       !(dash_index == std::string::npos && code->size() == 2)) { | 
| 292     // Reset |language| to ignore the invalid code. | 309     // Reset |language| to ignore the invalid code. | 
| 293     *code = std::string(); | 310     *code = std::string(); | 
| 294   } | 311   } | 
| 295 } | 312 } | 
| 296 | 313 | 
| 297 // static | 314 // static | 
| 298 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 315 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 
| 299                                                    const string16& contents) { | 316                                                    const string16& contents, | 
|  | 317                                                    std::string* cld_language_p, | 
|  | 318                                                    bool* is_cld_reliable_p) { | 
| 300 #if defined(ENABLE_LANGUAGE_DETECTION) | 319 #if defined(ENABLE_LANGUAGE_DETECTION) | 
| 301   base::TimeTicks begin_time = base::TimeTicks::Now(); | 320   base::TimeTicks begin_time = base::TimeTicks::Now(); | 
| 302   std::string cld_language = DetermineTextLanguage(contents); | 321   bool is_cld_reliable; | 
|  | 322   std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); | 
|  | 323 | 
|  | 324   if (cld_language_p != NULL) | 
|  | 325     *cld_language_p = cld_language; | 
|  | 326   if (is_cld_reliable_p != NULL) | 
|  | 327     *is_cld_reliable_p = is_cld_reliable; | 
|  | 328 | 
| 303   UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 329   UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 
| 304                              base::TimeTicks::Now() - begin_time); | 330                              base::TimeTicks::Now() - begin_time); | 
| 305   ConvertLanguageCodeSynonym(&cld_language); | 331   ConvertLanguageCodeSynonym(&cld_language); | 
| 306   VLOG(9) << "CLD determined language code: " << cld_language; | 332   VLOG(9) << "CLD determined language code: " << cld_language; | 
| 307 #endif  // defined(ENABLE_LANGUAGE_DETECTION) | 333 #endif  // defined(ENABLE_LANGUAGE_DETECTION) | 
| 308 | 334 | 
| 309   // Correct well-known format errors. | 335   // Correct well-known format errors. | 
| 310   std::string language = code; | 336   std::string language = code; | 
| 311   CorrectLanguageCodeTypo(&language); | 337   CorrectLanguageCodeTypo(&language); | 
| 312 | 338 | 
| (...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 532   WebView* web_view = render_view()->GetWebView(); | 558   WebView* web_view = render_view()->GetWebView(); | 
| 533   if (!web_view) { | 559   if (!web_view) { | 
| 534     // When the WebView is going away, the render view should have called | 560     // When the WebView is going away, the render view should have called | 
| 535     // CancelPendingTranslation() which should have stopped any pending work, so | 561     // CancelPendingTranslation() which should have stopped any pending work, so | 
| 536     // that case should not happen. | 562     // that case should not happen. | 
| 537     NOTREACHED(); | 563     NOTREACHED(); | 
| 538     return NULL; | 564     return NULL; | 
| 539   } | 565   } | 
| 540   return web_view->mainFrame(); | 566   return web_view->mainFrame(); | 
| 541 } | 567 } | 
| OLD | NEW | 
|---|