| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/render_view.h" | 5 #include "chrome/renderer/render_view.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <string> | 8 #include <string> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 348 std::string new_extension = | 348 std::string new_extension = |
| 349 RenderThread::current()->GetExtensionIdForURL(new_url); | 349 RenderThread::current()->GetExtensionIdForURL(new_url); |
| 350 return (old_extension != new_extension); | 350 return (old_extension != new_extension); |
| 351 } | 351 } |
| 352 | 352 |
| 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' | 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' |
| 354 // if it failed. | 354 // if it failed. |
| 355 // | 355 // |
| 356 // Note this only works on Windows at this time. It always returns 'unknown' | 356 // Note this only works on Windows at this time. It always returns 'unknown' |
| 357 // on other platforms. | 357 // on other platforms. |
| 358 static std::string DetermineTextLanguage(const string16& text) { | 358 static std::string DetermineTextLanguage(const std::wstring& text) { |
| 359 // Text with less than 100 bytes will probably not provide good results. | 359 // Text with less than 100 bytes will probably not provide good results. |
| 360 // Report it as unknown language. | 360 // Report it as unknown language. |
| 361 if (text.length() < 100) | 361 if (text.length() < 100) |
| 362 return RenderView::kUnknownLanguageCode; | 362 return RenderView::kUnknownLanguageCode; |
| 363 | 363 |
| 364 std::string language = RenderView::kUnknownLanguageCode; | 364 std::string language = RenderView::kUnknownLanguageCode; |
| 365 int num_languages = 0; | 365 int num_languages = 0; |
| 366 bool is_reliable = false; | 366 bool is_reliable = false; |
| 367 string16 input = WideToUTF16(text); |
| 367 Language cld_language = | 368 Language cld_language = |
| 368 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 369 DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, |
| 369 &num_languages, NULL); | 370 &num_languages, NULL); |
| 370 if (is_reliable && cld_language != NUM_LANGUAGES && | 371 if (is_reliable && cld_language != NUM_LANGUAGES && |
| 371 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 372 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
| 372 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 373 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
| 373 // the languages CLD can detect. As a result, it'll return the invalid | 374 // the languages CLD can detect. As a result, it'll return the invalid |
| 374 // language code for tradtional Chinese among others. | 375 // language code for tradtional Chinese among others. |
| 375 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and | 376 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and |
| 376 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 377 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
| 377 // for Simplified Chinese. | 378 // for Simplified Chinese. |
| 378 language = LanguageCodeWithDialects(cld_language); | 379 language = LanguageCodeWithDialects(cld_language); |
| (...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 820 | 821 |
| 821 if (!preliminary_capture) | 822 if (!preliminary_capture) |
| 822 last_indexed_page_id_ = load_id; | 823 last_indexed_page_id_ = load_id; |
| 823 | 824 |
| 824 // Get the URL for this page. | 825 // Get the URL for this page. |
| 825 GURL url(main_frame->url()); | 826 GURL url(main_frame->url()); |
| 826 if (url.is_empty()) | 827 if (url.is_empty()) |
| 827 return; | 828 return; |
| 828 | 829 |
| 829 // Retrieve the frame's full text. | 830 // Retrieve the frame's full text. |
| 830 string16 contents; | 831 std::wstring contents; |
| 831 CaptureText(main_frame, &contents); | 832 CaptureText(main_frame, &contents); |
| 832 if (contents.size()) { | 833 if (contents.size()) { |
| 833 base::TimeTicks begin_time = base::TimeTicks::Now(); | 834 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 834 std::string language = DetermineTextLanguage(contents); | 835 std::string language = DetermineTextLanguage(contents); |
| 835 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 836 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
| 836 base::TimeTicks::Now() - begin_time); | 837 base::TimeTicks::Now() - begin_time); |
| 837 | 838 |
| 838 // Send the text to the browser for indexing (the browser might decide not | 839 // Send the text to the browser for indexing (the browser might decide not |
| 839 // to index, if the URL is HTTPS for instance) and language discovery. | 840 // to index, if the URL is HTTPS for instance) and language discovery. |
| 840 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, | 841 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, |
| 841 language)); | 842 language)); |
| 842 } | 843 } |
| 843 | 844 |
| 844 OnCaptureThumbnail(); | 845 OnCaptureThumbnail(); |
| 845 } | 846 } |
| 846 | 847 |
| 847 void RenderView::CaptureText(WebFrame* frame, string16* contents) { | 848 void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { |
| 848 contents->clear(); | 849 contents->clear(); |
| 849 if (!frame) | 850 if (!frame) |
| 850 return; | 851 return; |
| 851 | 852 |
| 852 #ifdef TIME_TEXT_RETRIEVAL | 853 #ifdef TIME_TEXT_RETRIEVAL |
| 853 double begin = time_util::GetHighResolutionTimeNow(); | 854 double begin = time_util::GetHighResolutionTimeNow(); |
| 854 #endif | 855 #endif |
| 855 | 856 |
| 856 // get the contents of the frame | 857 // get the contents of the frame |
| 857 *contents = frame->contentAsText(kMaxIndexChars); | 858 *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars)); |
| 858 | 859 |
| 859 #ifdef TIME_TEXT_RETRIEVAL | 860 #ifdef TIME_TEXT_RETRIEVAL |
| 860 double end = time_util::GetHighResolutionTimeNow(); | 861 double end = time_util::GetHighResolutionTimeNow(); |
| 861 char buf[128]; | 862 char buf[128]; |
| 862 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", | 863 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", |
| 863 contents.size(), (end - begin)*1000); | 864 contents.size(), (end - begin)*1000); |
| 864 OutputDebugStringA(buf); | 865 OutputDebugStringA(buf); |
| 865 #endif | 866 #endif |
| 866 | 867 |
| 867 // When the contents are clipped to the maximum, we don't want to have a | 868 // When the contents are clipped to the maximum, we don't want to have a |
| 868 // partial word indexed at the end that might have been clipped. Therefore, | 869 // partial word indexed at the end that might have been clipped. Therefore, |
| 869 // terminate the string at the last space to ensure no words are clipped. | 870 // terminate the string at the last space to ensure no words are clipped. |
| 870 if (contents->size() == kMaxIndexChars) { | 871 if (contents->size() == kMaxIndexChars) { |
| 871 size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); | 872 size_t last_space_index = contents->find_last_of(kWhitespaceWide); |
| 872 if (last_space_index == std::wstring::npos) | 873 if (last_space_index == std::wstring::npos) |
| 873 return; // don't index if we got a huge block of text with no spaces | 874 return; // don't index if we got a huge block of text with no spaces |
| 874 contents->resize(last_space_index); | 875 contents->resize(last_space_index); |
| 875 } | 876 } |
| 876 } | 877 } |
| 877 | 878 |
| 878 bool RenderView::CaptureThumbnail(WebView* view, | 879 bool RenderView::CaptureThumbnail(WebView* view, |
| 879 int w, | 880 int w, |
| 880 int h, | 881 int h, |
| 881 SkBitmap* thumbnail, | 882 SkBitmap* thumbnail, |
| (...skipping 4263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5145 webkit_glue::FormData form; | 5146 webkit_glue::FormData form; |
| 5146 const WebInputElement element = node.toConst<WebInputElement>(); | 5147 const WebInputElement element = node.toConst<WebInputElement>(); |
| 5147 if (!form_manager_.FindFormWithFormControlElement( | 5148 if (!form_manager_.FindFormWithFormControlElement( |
| 5148 element, FormManager::REQUIRE_NONE, &form)) | 5149 element, FormManager::REQUIRE_NONE, &form)) |
| 5149 return; | 5150 return; |
| 5150 | 5151 |
| 5151 autofill_action_ = action; | 5152 autofill_action_ = action; |
| 5152 Send(new ViewHostMsg_FillAutoFillFormData( | 5153 Send(new ViewHostMsg_FillAutoFillFormData( |
| 5153 routing_id_, autofill_query_id_, form, value, label)); | 5154 routing_id_, autofill_query_id_, form, value, label)); |
| 5154 } | 5155 } |
| OLD | NEW |