Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(200)

Side by Side Diff: chrome/renderer/render_view.cc

Issue 2802010: Revert 49594 - Convert page contents grabbing from wide to UTF16. The current... (Closed) Base URL: svn://svn.chromium.org/chrome/branches/437/src/
Patch Set: Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/renderer/render_view.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/render_view.h" 5 #include "chrome/renderer/render_view.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
348 std::string new_extension = 348 std::string new_extension =
349 RenderThread::current()->GetExtensionIdForURL(new_url); 349 RenderThread::current()->GetExtensionIdForURL(new_url);
350 return (old_extension != new_extension); 350 return (old_extension != new_extension);
351 } 351 }
352 352
353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' 353 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
354 // if it failed. 354 // if it failed.
355 // 355 //
356 // Note this only works on Windows at this time. It always returns 'unknown' 356 // Note this only works on Windows at this time. It always returns 'unknown'
357 // on other platforms. 357 // on other platforms.
358 static std::string DetermineTextLanguage(const string16& text) { 358 static std::string DetermineTextLanguage(const std::wstring& text) {
359 // Text with less than 100 bytes will probably not provide good results. 359 // Text with less than 100 bytes will probably not provide good results.
360 // Report it as unknown language. 360 // Report it as unknown language.
361 if (text.length() < 100) 361 if (text.length() < 100)
362 return RenderView::kUnknownLanguageCode; 362 return RenderView::kUnknownLanguageCode;
363 363
364 std::string language = RenderView::kUnknownLanguageCode; 364 std::string language = RenderView::kUnknownLanguageCode;
365 int num_languages = 0; 365 int num_languages = 0;
366 bool is_reliable = false; 366 bool is_reliable = false;
367 string16 input = WideToUTF16(text);
367 Language cld_language = 368 Language cld_language =
368 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, 369 DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable,
369 &num_languages, NULL); 370 &num_languages, NULL);
370 if (is_reliable && cld_language != NUM_LANGUAGES && 371 if (is_reliable && cld_language != NUM_LANGUAGES &&
371 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { 372 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
372 // We should not use LanguageCode_ISO_639_1 because it does not cover all 373 // We should not use LanguageCode_ISO_639_1 because it does not cover all
373 // the languages CLD can detect. As a result, it'll return the invalid 374 // the languages CLD can detect. As a result, it'll return the invalid
374 // language code for tradtional Chinese among others. 375 // language code for tradtional Chinese among others.
375 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and 376 // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
376 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN 377 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
377 // for Simplified Chinese. 378 // for Simplified Chinese.
378 language = LanguageCodeWithDialects(cld_language); 379 language = LanguageCodeWithDialects(cld_language);
(...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after
820 821
821 if (!preliminary_capture) 822 if (!preliminary_capture)
822 last_indexed_page_id_ = load_id; 823 last_indexed_page_id_ = load_id;
823 824
824 // Get the URL for this page. 825 // Get the URL for this page.
825 GURL url(main_frame->url()); 826 GURL url(main_frame->url());
826 if (url.is_empty()) 827 if (url.is_empty())
827 return; 828 return;
828 829
829 // Retrieve the frame's full text. 830 // Retrieve the frame's full text.
830 string16 contents; 831 std::wstring contents;
831 CaptureText(main_frame, &contents); 832 CaptureText(main_frame, &contents);
832 if (contents.size()) { 833 if (contents.size()) {
833 base::TimeTicks begin_time = base::TimeTicks::Now(); 834 base::TimeTicks begin_time = base::TimeTicks::Now();
834 std::string language = DetermineTextLanguage(contents); 835 std::string language = DetermineTextLanguage(contents);
835 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", 836 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection",
836 base::TimeTicks::Now() - begin_time); 837 base::TimeTicks::Now() - begin_time);
837 838
838 // Send the text to the browser for indexing (the browser might decide not 839 // Send the text to the browser for indexing (the browser might decide not
839 // to index, if the URL is HTTPS for instance) and language discovery. 840 // to index, if the URL is HTTPS for instance) and language discovery.
840 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, 841 Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents,
841 language)); 842 language));
842 } 843 }
843 844
844 OnCaptureThumbnail(); 845 OnCaptureThumbnail();
845 } 846 }
846 847
847 void RenderView::CaptureText(WebFrame* frame, string16* contents) { 848 void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
848 contents->clear(); 849 contents->clear();
849 if (!frame) 850 if (!frame)
850 return; 851 return;
851 852
852 #ifdef TIME_TEXT_RETRIEVAL 853 #ifdef TIME_TEXT_RETRIEVAL
853 double begin = time_util::GetHighResolutionTimeNow(); 854 double begin = time_util::GetHighResolutionTimeNow();
854 #endif 855 #endif
855 856
856 // get the contents of the frame 857 // get the contents of the frame
857 *contents = frame->contentAsText(kMaxIndexChars); 858 *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars));
858 859
859 #ifdef TIME_TEXT_RETRIEVAL 860 #ifdef TIME_TEXT_RETRIEVAL
860 double end = time_util::GetHighResolutionTimeNow(); 861 double end = time_util::GetHighResolutionTimeNow();
861 char buf[128]; 862 char buf[128];
862 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", 863 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n",
863 contents.size(), (end - begin)*1000); 864 contents.size(), (end - begin)*1000);
864 OutputDebugStringA(buf); 865 OutputDebugStringA(buf);
865 #endif 866 #endif
866 867
867 // When the contents are clipped to the maximum, we don't want to have a 868 // When the contents are clipped to the maximum, we don't want to have a
868 // partial word indexed at the end that might have been clipped. Therefore, 869 // partial word indexed at the end that might have been clipped. Therefore,
869 // terminate the string at the last space to ensure no words are clipped. 870 // terminate the string at the last space to ensure no words are clipped.
870 if (contents->size() == kMaxIndexChars) { 871 if (contents->size() == kMaxIndexChars) {
871 size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); 872 size_t last_space_index = contents->find_last_of(kWhitespaceWide);
872 if (last_space_index == std::wstring::npos) 873 if (last_space_index == std::wstring::npos)
873 return; // don't index if we got a huge block of text with no spaces 874 return; // don't index if we got a huge block of text with no spaces
874 contents->resize(last_space_index); 875 contents->resize(last_space_index);
875 } 876 }
876 } 877 }
877 878
878 bool RenderView::CaptureThumbnail(WebView* view, 879 bool RenderView::CaptureThumbnail(WebView* view,
879 int w, 880 int w,
880 int h, 881 int h,
881 SkBitmap* thumbnail, 882 SkBitmap* thumbnail,
(...skipping 4263 matching lines...) Expand 10 before | Expand all | Expand 10 after
5145 webkit_glue::FormData form; 5146 webkit_glue::FormData form;
5146 const WebInputElement element = node.toConst<WebInputElement>(); 5147 const WebInputElement element = node.toConst<WebInputElement>();
5147 if (!form_manager_.FindFormWithFormControlElement( 5148 if (!form_manager_.FindFormWithFormControlElement(
5148 element, FormManager::REQUIRE_NONE, &form)) 5149 element, FormManager::REQUIRE_NONE, &form))
5149 return; 5150 return;
5150 5151
5151 autofill_action_ = action; 5152 autofill_action_ = action;
5152 Send(new ViewHostMsg_FillAutoFillFormData( 5153 Send(new ViewHostMsg_FillAutoFillFormData(
5153 routing_id_, autofill_query_id_, form, value, label)); 5154 routing_id_, autofill_query_id_, form, value, label));
5154 } 5155 }
OLDNEW
« no previous file with comments | « chrome/renderer/render_view.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698