Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc

Issue 146053007: Update safe_browsing code to use WebElementCollection instead of WebNodeCollection (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/renderer/safe_browsing/malware_dom_details.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/containers/hash_tables.h" 9 #include "base/containers/hash_tables.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/message_loop/message_loop.h" 11 #include "base/message_loop/message_loop.h"
12 #include "base/metrics/histogram.h" 12 #include "base/metrics/histogram.h"
13 #include "base/strings/string_util.h" 13 #include "base/strings/string_util.h"
14 #include "base/time/time.h" 14 #include "base/time/time.h"
15 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" 15 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
16 #include "chrome/renderer/safe_browsing/features.h" 16 #include "chrome/renderer/safe_browsing/features.h"
17 #include "content/public/renderer/render_view.h" 17 #include "content/public/renderer/render_view.h"
18 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 18 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
19 #include "third_party/WebKit/public/platform/WebString.h" 19 #include "third_party/WebKit/public/platform/WebString.h"
20 #include "third_party/WebKit/public/web/WebElement.h" 20 #include "third_party/WebKit/public/web/WebElement.h"
21 #include "third_party/WebKit/public/web/WebElementCollection.h"
21 #include "third_party/WebKit/public/web/WebFrame.h" 22 #include "third_party/WebKit/public/web/WebFrame.h"
22 #include "third_party/WebKit/public/web/WebNodeCollection.h"
23 #include "third_party/WebKit/public/web/WebView.h" 23 #include "third_party/WebKit/public/web/WebView.h"
24 24
25 namespace safe_browsing { 25 namespace safe_browsing {
26 26
27 // This time should be short enough that it doesn't noticeably disrupt the 27 // This time should be short enough that it doesn't noticeably disrupt the
28 // user's interaction with the page. 28 // user's interaction with the page.
29 const int PhishingDOMFeatureExtractor::kMaxTimePerChunkMs = 10; 29 const int PhishingDOMFeatureExtractor::kMaxTimePerChunkMs = 10;
30 30
31 // Experimenting shows that we get a reasonable gain in performance by 31 // Experimenting shows that we get a reasonable gain in performance by
32 // increasing this up to around 10, but there's not much benefit in 32 // increasing this up to around 10, but there's not much benefit in
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
85 start_time(start_time_ticks), 85 start_time(start_time_ticks),
86 num_iterations(0) {} 86 num_iterations(0) {}
87 87
88 ~PageFeatureState() {} 88 ~PageFeatureState() {}
89 }; 89 };
90 90
91 // Per-frame state 91 // Per-frame state
92 struct PhishingDOMFeatureExtractor::FrameData { 92 struct PhishingDOMFeatureExtractor::FrameData {
93 // This is our reference to document.all, which is an iterator over all 93 // This is our reference to document.all, which is an iterator over all
94 // of the elements in the document. It keeps track of our current position. 94 // of the elements in the document. It keeps track of our current position.
95 blink::WebNodeCollection elements; 95 blink::WebElementCollection elements;
96 // The domain of the document URL, stored here so that we don't need to 96 // The domain of the document URL, stored here so that we don't need to
97 // recompute it every time it's needed. 97 // recompute it every time it's needed.
98 std::string domain; 98 std::string domain;
99 }; 99 };
100 100
101 PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor( 101 PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor(
102 content::RenderView* render_view, 102 content::RenderView* render_view,
103 FeatureExtractorClock* clock) 103 FeatureExtractorClock* clock)
104 : render_view_(render_view), 104 : render_view_(render_view),
105 clock_(clock), 105 clock_(clock),
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 151
152 if (cur_document_.isNull()) { 152 if (cur_document_.isNull()) {
153 // This will only happen if we weren't able to get the document for the 153 // This will only happen if we weren't able to get the document for the
154 // main frame. We'll treat this as an extraction failure. 154 // main frame. We'll treat this as an extraction failure.
155 RunCallback(false); 155 RunCallback(false);
156 return; 156 return;
157 } 157 }
158 158
159 int num_elements = 0; 159 int num_elements = 0;
160 for (; !cur_document_.isNull(); cur_document_ = GetNextDocument()) { 160 for (; !cur_document_.isNull(); cur_document_ = GetNextDocument()) {
161 blink::WebNode cur_node; 161 blink::WebElement cur_element;
162 if (cur_frame_data_.get()) { 162 if (cur_frame_data_.get()) {
163 // We're resuming traversal of a frame, so just advance to the next node. 163 // We're resuming traversal of a frame, so just advance to the next
164 cur_node = cur_frame_data_->elements.nextItem(); 164 // element.
165 cur_element = cur_frame_data_->elements.nextItem();
165 // When we resume the traversal, the first call to nextItem() potentially 166 // When we resume the traversal, the first call to nextItem() potentially
166 // has to walk through the document again from the beginning, if it was 167 // has to walk through the document again from the beginning, if it was
167 // modified between our chunks of work. Log how long this takes, so we 168 // modified between our chunks of work. Log how long this takes, so we
168 // can tell if it's too slow. 169 // can tell if it's too slow.
169 UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime", 170 UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime",
170 clock_->Now() - current_chunk_start_time); 171 clock_->Now() - current_chunk_start_time);
171 } else { 172 } else {
172 // We just moved to a new frame, so update our frame state 173 // We just moved to a new frame, so update our frame state
173 // and advance to the first element. 174 // and advance to the first element.
174 ResetFrameData(); 175 ResetFrameData();
175 cur_node = cur_frame_data_->elements.firstItem(); 176 cur_element = cur_frame_data_->elements.firstItem();
176 } 177 }
177 178
178 for (; !cur_node.isNull(); 179 for (; !cur_element.isNull();
179 cur_node = cur_frame_data_->elements.nextItem()) { 180 cur_element = cur_frame_data_->elements.nextItem()) {
180 if (!cur_node.isElementNode()) { 181 if (cur_element.hasTagName("a")) {
181 continue; 182 HandleLink(cur_element);
182 } 183 } else if (cur_element.hasTagName("form")) {
183 blink::WebElement element = cur_node.to<blink::WebElement>(); 184 HandleForm(cur_element);
184 if (element.hasTagName("a")) { 185 } else if (cur_element.hasTagName("img")) {
185 HandleLink(element); 186 HandleImage(cur_element);
186 } else if (element.hasTagName("form")) { 187 } else if (cur_element.hasTagName("input")) {
187 HandleForm(element); 188 HandleInput(cur_element);
188 } else if (element.hasTagName("img")) { 189 } else if (cur_element.hasTagName("script")) {
189 HandleImage(element); 190 HandleScript(cur_element);
190 } else if (element.hasTagName("input")) {
191 HandleInput(element);
192 } else if (element.hasTagName("script")) {
193 HandleScript(element);
194 } 191 }
195 192
196 if (++num_elements >= kClockCheckGranularity) { 193 if (++num_elements >= kClockCheckGranularity) {
197 num_elements = 0; 194 num_elements = 0;
198 base::TimeTicks now = clock_->Now(); 195 base::TimeTicks now = clock_->Now();
199 if (now - page_feature_state_->start_time >= 196 if (now - page_feature_state_->start_time >=
200 base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) { 197 base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) {
201 DLOG(ERROR) << "Feature extraction took too long, giving up"; 198 DLOG(ERROR) << "Feature extraction took too long, giving up";
202 // We expect this to happen infrequently, so record when it does. 199 // We expect this to happen infrequently, so record when it does.
203 UMA_HISTOGRAM_COUNTS("SBClientPhishing.DOMFeatureTimeout", 1); 200 UMA_HISTOGRAM_COUNTS("SBClientPhishing.DOMFeatureTimeout", 1);
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
491 // Record number of script tags (discretized for numerical stability.) 488 // Record number of script tags (discretized for numerical stability.)
492 if (page_feature_state_->num_script_tags > 1) { 489 if (page_feature_state_->num_script_tags > 1) {
493 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); 490 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne);
494 if (page_feature_state_->num_script_tags > 6) { 491 if (page_feature_state_->num_script_tags > 6) {
495 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); 492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix);
496 } 493 }
497 } 494 }
498 } 495 }
499 496
500 } // namespace safe_browsing 497 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/malware_dom_details.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698