| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/render_view.h" | 5 #include "chrome/renderer/render_view.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <string> | 8 #include <string> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 348 std::string new_extension = | 348 std::string new_extension = |
| 349 RenderThread::current()->GetExtensionIdForURL(new_url); | 349 RenderThread::current()->GetExtensionIdForURL(new_url); |
| 350 return (old_extension != new_extension); | 350 return (old_extension != new_extension); |
| 351 } | 351 } |
| 352 | 352 |
| 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' | 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' |
| 354 // if it failed. | 354 // if it failed. |
| 355 // | 355 // |
| 356 // Note this only works on Windows at this time. It always returns 'unknown' | 356 // Note this only works on Windows at this time. It always returns 'unknown' |
| 357 // on other platforms. | 357 // on other platforms. |
| 358 static std::string DetermineTextLanguage(const std::wstring& text) { | 358 static std::string DetermineTextLanguage(const string16& text) { |
| 359 // Text with less than 100 bytes will probably not provide good results. | 359 // Text with less than 100 bytes will probably not provide good results. |
| 360 // Report it as unknown language. | 360 // Report it as unknown language. |
| 361 if (text.length() < 100) | 361 if (text.length() < 100) |
| 362 return RenderView::kUnknownLanguageCode; | 362 return RenderView::kUnknownLanguageCode; |
| 363 | 363 |
| 364 std::string language = RenderView::kUnknownLanguageCode; | 364 std::string language = RenderView::kUnknownLanguageCode; |
| 365 int num_languages = 0; | 365 int num_languages = 0; |
| 366 bool is_reliable = false; | 366 bool is_reliable = false; |
| 367 string16 input = WideToUTF16(text); | |
| 368 Language cld_language = | 367 Language cld_language = |
| 369 DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, | 368 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
| 370 &num_languages, NULL); | 369 &num_languages, NULL); |
| 371 if (is_reliable && cld_language != NUM_LANGUAGES && | 370 if (is_reliable && cld_language != NUM_LANGUAGES && |
| 372 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 371 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
| 373 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 372 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
| 374 // the languages CLD can detect. As a result, it'll return the invalid | 373 // the languages CLD can detect. As a result, it'll return the invalid |
| 375 // language code for tradtional Chinese among others. | 374 // language code for tradtional Chinese among others. |
| 376 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and | 375 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and |
| 377 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 376 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
| 378 // for Simplified Chinese. | 377 // for Simplified Chinese. |
| 379 language = LanguageCodeWithDialects(cld_language); | 378 language = LanguageCodeWithDialects(cld_language); |
| (...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 815 | 814 |
| 816 if (!preliminary_capture) | 815 if (!preliminary_capture) |
| 817 last_indexed_page_id_ = load_id; | 816 last_indexed_page_id_ = load_id; |
| 818 | 817 |
| 819 // Get the URL for this page. | 818 // Get the URL for this page. |
| 820 GURL url(main_frame->url()); | 819 GURL url(main_frame->url()); |
| 821 if (url.is_empty()) | 820 if (url.is_empty()) |
| 822 return; | 821 return; |
| 823 | 822 |
| 824 // Retrieve the frame's full text. | 823 // Retrieve the frame's full text. |
| 825 std::wstring contents; | 824 string16 contents; |
| 826 CaptureText(main_frame, &contents); | 825 CaptureText(main_frame, &contents); |
| 827 if (contents.size()) { | 826 if (contents.size()) { |
| 828 base::TimeTicks begin_time = base::TimeTicks::Now(); | 827 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 829 std::string language = DetermineTextLanguage(contents); | 828 std::string language = DetermineTextLanguage(contents); |
| 830 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 829 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
| 831 base::TimeTicks::Now() - begin_time); | 830 base::TimeTicks::Now() - begin_time); |
| 832 | 831 |
| 833 // Send the text to the browser for indexing (the browser might decide not | 832 // Send the text to the browser for indexing (the browser might decide not |
| 834 // to index, if the URL is HTTPS for instance) and language discovery. | 833 // to index, if the URL is HTTPS for instance) and language discovery. |
| 835 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, | 834 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, |
| 836 language)); | 835 language)); |
| 837 } | 836 } |
| 838 | 837 |
| 839 OnCaptureThumbnail(); | 838 OnCaptureThumbnail(); |
| 840 } | 839 } |
| 841 | 840 |
| 842 void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { | 841 void RenderView::CaptureText(WebFrame* frame, string16* contents) { |
| 843 contents->clear(); | 842 contents->clear(); |
| 844 if (!frame) | 843 if (!frame) |
| 845 return; | 844 return; |
| 846 | 845 |
| 847 #ifdef TIME_TEXT_RETRIEVAL | 846 #ifdef TIME_TEXT_RETRIEVAL |
| 848 double begin = time_util::GetHighResolutionTimeNow(); | 847 double begin = time_util::GetHighResolutionTimeNow(); |
| 849 #endif | 848 #endif |
| 850 | 849 |
| 851 // get the contents of the frame | 850 // get the contents of the frame |
| 852 *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars)); | 851 *contents = frame->contentAsText(kMaxIndexChars); |
| 853 | 852 |
| 854 #ifdef TIME_TEXT_RETRIEVAL | 853 #ifdef TIME_TEXT_RETRIEVAL |
| 855 double end = time_util::GetHighResolutionTimeNow(); | 854 double end = time_util::GetHighResolutionTimeNow(); |
| 856 char buf[128]; | 855 char buf[128]; |
| 857 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", | 856 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", |
| 858 contents.size(), (end - begin)*1000); | 857 contents.size(), (end - begin)*1000); |
| 859 OutputDebugStringA(buf); | 858 OutputDebugStringA(buf); |
| 860 #endif | 859 #endif |
| 861 | 860 |
| 862 // When the contents are clipped to the maximum, we don't want to have a | 861 // When the contents are clipped to the maximum, we don't want to have a |
| 863 // partial word indexed at the end that might have been clipped. Therefore, | 862 // partial word indexed at the end that might have been clipped. Therefore, |
| 864 // terminate the string at the last space to ensure no words are clipped. | 863 // terminate the string at the last space to ensure no words are clipped. |
| 865 if (contents->size() == kMaxIndexChars) { | 864 if (contents->size() == kMaxIndexChars) { |
| 866 size_t last_space_index = contents->find_last_of(kWhitespaceWide); | 865 size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); |
| 867 if (last_space_index == std::wstring::npos) | 866 if (last_space_index == std::wstring::npos) |
| 868 return; // don't index if we got a huge block of text with no spaces | 867 return; // don't index if we got a huge block of text with no spaces |
| 869 contents->resize(last_space_index); | 868 contents->resize(last_space_index); |
| 870 } | 869 } |
| 871 } | 870 } |
| 872 | 871 |
| 873 bool RenderView::CaptureThumbnail(WebView* view, | 872 bool RenderView::CaptureThumbnail(WebView* view, |
| 874 int w, | 873 int w, |
| 875 int h, | 874 int h, |
| 876 SkBitmap* thumbnail, | 875 SkBitmap* thumbnail, |
| (...skipping 4235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5112 webkit_glue::FormData form; | 5111 webkit_glue::FormData form; |
| 5113 const WebInputElement element = node.toConst<WebInputElement>(); | 5112 const WebInputElement element = node.toConst<WebInputElement>(); |
| 5114 if (!form_manager_.FindFormWithFormControlElement( | 5113 if (!form_manager_.FindFormWithFormControlElement( |
| 5115 element, FormManager::REQUIRE_NONE, &form)) | 5114 element, FormManager::REQUIRE_NONE, &form)) |
| 5116 return; | 5115 return; |
| 5117 | 5116 |
| 5118 autofill_action_ = action; | 5117 autofill_action_ = action; |
| 5119 Send(new ViewHostMsg_FillAutoFillFormData( | 5118 Send(new ViewHostMsg_FillAutoFillFormData( |
| 5120 routing_id_, autofill_query_id_, form, value, label)); | 5119 routing_id_, autofill_query_id_, form, value, label)); |
| 5121 } | 5120 } |
| OLD | NEW |