OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/render_view.h" | 5 #include "chrome/renderer/render_view.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
348 std::string new_extension = | 348 std::string new_extension = |
349 RenderThread::current()->GetExtensionIdForURL(new_url); | 349 RenderThread::current()->GetExtensionIdForURL(new_url); |
350 return (old_extension != new_extension); | 350 return (old_extension != new_extension); |
351 } | 351 } |
352 | 352 |
353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' | 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' |
354 // if it failed. | 354 // if it failed. |
355 // | 355 // |
356 // Note this only works on Windows at this time. It always returns 'unknown' | 356 // Note this only works on Windows at this time. It always returns 'unknown' |
357 // on other platforms. | 357 // on other platforms. |
358 static std::string DetermineTextLanguage(const std::wstring& text) { | 358 static std::string DetermineTextLanguage(const string16& text) { |
359 // Text with less than 100 bytes will probably not provide good results. | 359 // Text with less than 100 bytes will probably not provide good results. |
360 // Report it as unknown language. | 360 // Report it as unknown language. |
361 if (text.length() < 100) | 361 if (text.length() < 100) |
362 return RenderView::kUnknownLanguageCode; | 362 return RenderView::kUnknownLanguageCode; |
363 | 363 |
364 std::string language = RenderView::kUnknownLanguageCode; | 364 std::string language = RenderView::kUnknownLanguageCode; |
365 int num_languages = 0; | 365 int num_languages = 0; |
366 bool is_reliable = false; | 366 bool is_reliable = false; |
367 string16 input = WideToUTF16(text); | |
368 Language cld_language = | 367 Language cld_language = |
369 DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, | 368 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
370 &num_languages, NULL); | 369 &num_languages, NULL); |
371 if (is_reliable && cld_language != NUM_LANGUAGES && | 370 if (is_reliable && cld_language != NUM_LANGUAGES && |
372 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 371 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
373 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 372 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
374 // the languages CLD can detect. As a result, it'll return the invalid | 373 // the languages CLD can detect. As a result, it'll return the invalid |
375 // language code for tradtional Chinese among others. | 374 // language code for tradtional Chinese among others. |
376 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and | 375 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and |
377 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 376 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
378 // for Simplified Chinese. | 377 // for Simplified Chinese. |
379 language = LanguageCodeWithDialects(cld_language); | 378 language = LanguageCodeWithDialects(cld_language); |
(...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
815 | 814 |
816 if (!preliminary_capture) | 815 if (!preliminary_capture) |
817 last_indexed_page_id_ = load_id; | 816 last_indexed_page_id_ = load_id; |
818 | 817 |
819 // Get the URL for this page. | 818 // Get the URL for this page. |
820 GURL url(main_frame->url()); | 819 GURL url(main_frame->url()); |
821 if (url.is_empty()) | 820 if (url.is_empty()) |
822 return; | 821 return; |
823 | 822 |
824 // Retrieve the frame's full text. | 823 // Retrieve the frame's full text. |
825 std::wstring contents; | 824 string16 contents; |
826 CaptureText(main_frame, &contents); | 825 CaptureText(main_frame, &contents); |
827 if (contents.size()) { | 826 if (contents.size()) { |
828 base::TimeTicks begin_time = base::TimeTicks::Now(); | 827 base::TimeTicks begin_time = base::TimeTicks::Now(); |
829 std::string language = DetermineTextLanguage(contents); | 828 std::string language = DetermineTextLanguage(contents); |
830 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 829 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
831 base::TimeTicks::Now() - begin_time); | 830 base::TimeTicks::Now() - begin_time); |
832 | 831 |
833 // Send the text to the browser for indexing (the browser might decide not | 832 // Send the text to the browser for indexing (the browser might decide not |
834 // to index, if the URL is HTTPS for instance) and language discovery. | 833 // to index, if the URL is HTTPS for instance) and language discovery. |
835 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, | 834 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, |
836 language)); | 835 language)); |
837 } | 836 } |
838 | 837 |
839 OnCaptureThumbnail(); | 838 OnCaptureThumbnail(); |
840 } | 839 } |
841 | 840 |
842 void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { | 841 void RenderView::CaptureText(WebFrame* frame, string16* contents) { |
843 contents->clear(); | 842 contents->clear(); |
844 if (!frame) | 843 if (!frame) |
845 return; | 844 return; |
846 | 845 |
847 #ifdef TIME_TEXT_RETRIEVAL | 846 #ifdef TIME_TEXT_RETRIEVAL |
848 double begin = time_util::GetHighResolutionTimeNow(); | 847 double begin = time_util::GetHighResolutionTimeNow(); |
849 #endif | 848 #endif |
850 | 849 |
851 // get the contents of the frame | 850 // get the contents of the frame |
852 *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars)); | 851 *contents = frame->contentAsText(kMaxIndexChars); |
853 | 852 |
854 #ifdef TIME_TEXT_RETRIEVAL | 853 #ifdef TIME_TEXT_RETRIEVAL |
855 double end = time_util::GetHighResolutionTimeNow(); | 854 double end = time_util::GetHighResolutionTimeNow(); |
856 char buf[128]; | 855 char buf[128]; |
857 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", | 856 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", |
858 contents.size(), (end - begin)*1000); | 857 contents.size(), (end - begin)*1000); |
859 OutputDebugStringA(buf); | 858 OutputDebugStringA(buf); |
860 #endif | 859 #endif |
861 | 860 |
862 // When the contents are clipped to the maximum, we don't want to have a | 861 // When the contents are clipped to the maximum, we don't want to have a |
863 // partial word indexed at the end that might have been clipped. Therefore, | 862 // partial word indexed at the end that might have been clipped. Therefore, |
864 // terminate the string at the last space to ensure no words are clipped. | 863 // terminate the string at the last space to ensure no words are clipped. |
865 if (contents->size() == kMaxIndexChars) { | 864 if (contents->size() == kMaxIndexChars) { |
866 size_t last_space_index = contents->find_last_of(kWhitespaceWide); | 865 size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); |
867 if (last_space_index == std::wstring::npos) | 866 if (last_space_index == std::wstring::npos) |
868 return; // don't index if we got a huge block of text with no spaces | 867 return; // don't index if we got a huge block of text with no spaces |
869 contents->resize(last_space_index); | 868 contents->resize(last_space_index); |
870 } | 869 } |
871 } | 870 } |
872 | 871 |
873 bool RenderView::CaptureThumbnail(WebView* view, | 872 bool RenderView::CaptureThumbnail(WebView* view, |
874 int w, | 873 int w, |
875 int h, | 874 int h, |
876 SkBitmap* thumbnail, | 875 SkBitmap* thumbnail, |
(...skipping 4235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5112 webkit_glue::FormData form; | 5111 webkit_glue::FormData form; |
5113 const WebInputElement element = node.toConst<WebInputElement>(); | 5112 const WebInputElement element = node.toConst<WebInputElement>(); |
5114 if (!form_manager_.FindFormWithFormControlElement( | 5113 if (!form_manager_.FindFormWithFormControlElement( |
5115 element, FormManager::REQUIRE_NONE, &form)) | 5114 element, FormManager::REQUIRE_NONE, &form)) |
5116 return; | 5115 return; |
5117 | 5116 |
5118 autofill_action_ = action; | 5117 autofill_action_ = action; |
5119 Send(new ViewHostMsg_FillAutoFillFormData( | 5118 Send(new ViewHostMsg_FillAutoFillFormData( |
5120 routing_id_, autofill_query_id_, form, value, label)); | 5119 routing_id_, autofill_query_id_, form, value, label)); |
5121 } | 5120 } |
OLD | NEW |