OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/render_view.h" | 5 #include "chrome/renderer/render_view.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
348 std::string new_extension = | 348 std::string new_extension = |
349 RenderThread::current()->GetExtensionIdForURL(new_url); | 349 RenderThread::current()->GetExtensionIdForURL(new_url); |
350 return (old_extension != new_extension); | 350 return (old_extension != new_extension); |
351 } | 351 } |
352 | 352 |
353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' | 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' |
354 // if it failed. | 354 // if it failed. |
355 // | 355 // |
356 // Note this only works on Windows at this time. It always returns 'unknown' | 356 // Note this only works on Windows at this time. It always returns 'unknown' |
357 // on other platforms. | 357 // on other platforms. |
358 static std::string DetermineTextLanguage(const string16& text) { | 358 static std::string DetermineTextLanguage(const std::wstring& text) { |
359 // Text with less than 100 bytes will probably not provide good results. | 359 // Text with less than 100 bytes will probably not provide good results. |
360 // Report it as unknown language. | 360 // Report it as unknown language. |
361 if (text.length() < 100) | 361 if (text.length() < 100) |
362 return RenderView::kUnknownLanguageCode; | 362 return RenderView::kUnknownLanguageCode; |
363 | 363 |
364 std::string language = RenderView::kUnknownLanguageCode; | 364 std::string language = RenderView::kUnknownLanguageCode; |
365 int num_languages = 0; | 365 int num_languages = 0; |
366 bool is_reliable = false; | 366 bool is_reliable = false; |
| 367 string16 input = WideToUTF16(text); |
367 Language cld_language = | 368 Language cld_language = |
368 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 369 DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, |
369 &num_languages, NULL); | 370 &num_languages, NULL); |
370 if (is_reliable && cld_language != NUM_LANGUAGES && | 371 if (is_reliable && cld_language != NUM_LANGUAGES && |
371 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 372 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
372 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 373 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
373 // the languages CLD can detect. As a result, it'll return the invalid | 374 // the languages CLD can detect. As a result, it'll return the invalid |
374 // language code for tradtional Chinese among others. | 375 // language code for tradtional Chinese among others. |
375 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and | 376 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and |
376 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 377 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
377 // for Simplified Chinese. | 378 // for Simplified Chinese. |
378 language = LanguageCodeWithDialects(cld_language); | 379 language = LanguageCodeWithDialects(cld_language); |
(...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
820 | 821 |
821 if (!preliminary_capture) | 822 if (!preliminary_capture) |
822 last_indexed_page_id_ = load_id; | 823 last_indexed_page_id_ = load_id; |
823 | 824 |
824 // Get the URL for this page. | 825 // Get the URL for this page. |
825 GURL url(main_frame->url()); | 826 GURL url(main_frame->url()); |
826 if (url.is_empty()) | 827 if (url.is_empty()) |
827 return; | 828 return; |
828 | 829 |
829 // Retrieve the frame's full text. | 830 // Retrieve the frame's full text. |
830 string16 contents; | 831 std::wstring contents; |
831 CaptureText(main_frame, &contents); | 832 CaptureText(main_frame, &contents); |
832 if (contents.size()) { | 833 if (contents.size()) { |
833 base::TimeTicks begin_time = base::TimeTicks::Now(); | 834 base::TimeTicks begin_time = base::TimeTicks::Now(); |
834 std::string language = DetermineTextLanguage(contents); | 835 std::string language = DetermineTextLanguage(contents); |
835 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 836 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
836 base::TimeTicks::Now() - begin_time); | 837 base::TimeTicks::Now() - begin_time); |
837 | 838 |
838 // Send the text to the browser for indexing (the browser might decide not | 839 // Send the text to the browser for indexing (the browser might decide not |
839 // to index, if the URL is HTTPS for instance) and language discovery. | 840 // to index, if the URL is HTTPS for instance) and language discovery. |
840 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, | 841 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, |
841 language)); | 842 language)); |
842 } | 843 } |
843 | 844 |
844 OnCaptureThumbnail(); | 845 OnCaptureThumbnail(); |
845 } | 846 } |
846 | 847 |
847 void RenderView::CaptureText(WebFrame* frame, string16* contents) { | 848 void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { |
848 contents->clear(); | 849 contents->clear(); |
849 if (!frame) | 850 if (!frame) |
850 return; | 851 return; |
851 | 852 |
852 #ifdef TIME_TEXT_RETRIEVAL | 853 #ifdef TIME_TEXT_RETRIEVAL |
853 double begin = time_util::GetHighResolutionTimeNow(); | 854 double begin = time_util::GetHighResolutionTimeNow(); |
854 #endif | 855 #endif |
855 | 856 |
856 // get the contents of the frame | 857 // get the contents of the frame |
857 *contents = frame->contentAsText(kMaxIndexChars); | 858 *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars)); |
858 | 859 |
859 #ifdef TIME_TEXT_RETRIEVAL | 860 #ifdef TIME_TEXT_RETRIEVAL |
860 double end = time_util::GetHighResolutionTimeNow(); | 861 double end = time_util::GetHighResolutionTimeNow(); |
861 char buf[128]; | 862 char buf[128]; |
862 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", | 863 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", |
863 contents.size(), (end - begin)*1000); | 864 contents.size(), (end - begin)*1000); |
864 OutputDebugStringA(buf); | 865 OutputDebugStringA(buf); |
865 #endif | 866 #endif |
866 | 867 |
867 // When the contents are clipped to the maximum, we don't want to have a | 868 // When the contents are clipped to the maximum, we don't want to have a |
868 // partial word indexed at the end that might have been clipped. Therefore, | 869 // partial word indexed at the end that might have been clipped. Therefore, |
869 // terminate the string at the last space to ensure no words are clipped. | 870 // terminate the string at the last space to ensure no words are clipped. |
870 if (contents->size() == kMaxIndexChars) { | 871 if (contents->size() == kMaxIndexChars) { |
871 size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); | 872 size_t last_space_index = contents->find_last_of(kWhitespaceWide); |
872 if (last_space_index == std::wstring::npos) | 873 if (last_space_index == std::wstring::npos) |
873 return; // don't index if we got a huge block of text with no spaces | 874 return; // don't index if we got a huge block of text with no spaces |
874 contents->resize(last_space_index); | 875 contents->resize(last_space_index); |
875 } | 876 } |
876 } | 877 } |
877 | 878 |
878 bool RenderView::CaptureThumbnail(WebView* view, | 879 bool RenderView::CaptureThumbnail(WebView* view, |
879 int w, | 880 int w, |
880 int h, | 881 int h, |
881 SkBitmap* thumbnail, | 882 SkBitmap* thumbnail, |
(...skipping 4263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5145 webkit_glue::FormData form; | 5146 webkit_glue::FormData form; |
5146 const WebInputElement element = node.toConst<WebInputElement>(); | 5147 const WebInputElement element = node.toConst<WebInputElement>(); |
5147 if (!form_manager_.FindFormWithFormControlElement( | 5148 if (!form_manager_.FindFormWithFormControlElement( |
5148 element, FormManager::REQUIRE_NONE, &form)) | 5149 element, FormManager::REQUIRE_NONE, &form)) |
5149 return; | 5150 return; |
5150 | 5151 |
5151 autofill_action_ = action; | 5152 autofill_action_ = action; |
5152 Send(new ViewHostMsg_FillAutoFillFormData( | 5153 Send(new ViewHostMsg_FillAutoFillFormData( |
5153 routing_id_, autofill_query_id_, form, value, label)); | 5154 routing_id_, autofill_query_id_, form, value, label)); |
5154 } | 5155 } |
OLD | NEW |