| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" |
| 6 | 6 |
| 7 #include "base/compiler_specific.h" | 7 #include "base/compiler_specific.h" |
| 8 #include "base/hash_tables.h" | 8 #include "base/hash_tables.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 cur_node = cur_frame_data_->elements.nextItem(); | 163 cur_node = cur_frame_data_->elements.nextItem(); |
| 164 // When we resume the traversal, the first call to nextItem() potentially | 164 // When we resume the traversal, the first call to nextItem() potentially |
| 165 // has to walk through the document again from the beginning, if it was | 165 // has to walk through the document again from the beginning, if it was |
| 166 // modified between our chunks of work. Log how long this takes, so we | 166 // modified between our chunks of work. Log how long this takes, so we |
| 167 // can tell if it's too slow. | 167 // can tell if it's too slow. |
| 168 UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime", | 168 UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime", |
| 169 clock_->Now() - current_chunk_start_time); | 169 clock_->Now() - current_chunk_start_time); |
| 170 } else { | 170 } else { |
| 171 // We just moved to a new frame, so update our frame state | 171 // We just moved to a new frame, so update our frame state |
| 172 // and advance to the first element. | 172 // and advance to the first element. |
| 173 if (!ResetFrameData()) { | 173 ResetFrameData(); |
| 174 // Nothing in this frame, move on to the next one. | |
| 175 DLOG(WARNING) << "No content in frame, skipping"; | |
| 176 continue; | |
| 177 } | |
| 178 cur_node = cur_frame_data_->elements.firstItem(); | 174 cur_node = cur_frame_data_->elements.firstItem(); |
| 179 } | 175 } |
| 180 | 176 |
| 181 for (; !cur_node.isNull(); | 177 for (; !cur_node.isNull(); |
| 182 cur_node = cur_frame_data_->elements.nextItem()) { | 178 cur_node = cur_frame_data_->elements.nextItem()) { |
| 183 if (!cur_node.isElementNode()) { | 179 if (!cur_node.isElementNode()) { |
| 184 continue; | 180 continue; |
| 185 } | 181 } |
| 186 WebKit::WebElement element = cur_node.to<WebKit::WebElement>(); | 182 WebKit::WebElement element = cur_node.to<WebKit::WebElement>(); |
| 187 if (element.hasTagName("a")) { | 183 if (element.hasTagName("a")) { |
| (...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 374 Clear(); | 370 Clear(); |
| 375 } | 371 } |
| 376 | 372 |
| 377 void PhishingDOMFeatureExtractor::Clear() { | 373 void PhishingDOMFeatureExtractor::Clear() { |
| 378 features_ = NULL; | 374 features_ = NULL; |
| 379 done_callback_.reset(NULL); | 375 done_callback_.reset(NULL); |
| 380 cur_frame_data_.reset(NULL); | 376 cur_frame_data_.reset(NULL); |
| 381 cur_document_.reset(); | 377 cur_document_.reset(); |
| 382 } | 378 } |
| 383 | 379 |
| 384 bool PhishingDOMFeatureExtractor::ResetFrameData() { | 380 void PhishingDOMFeatureExtractor::ResetFrameData() { |
| 385 DCHECK(!cur_document_.isNull()); | 381 DCHECK(!cur_document_.isNull()); |
| 386 DCHECK(!cur_frame_data_.get()); | 382 DCHECK(!cur_frame_data_.get()); |
| 387 | 383 |
| 388 cur_frame_data_.reset(new FrameData()); | 384 cur_frame_data_.reset(new FrameData()); |
| 389 cur_frame_data_->elements = cur_document_.all(); | 385 cur_frame_data_->elements = cur_document_.all(); |
| 390 cur_frame_data_->domain = | 386 cur_frame_data_->domain = |
| 391 net::RegistryControlledDomainService::GetDomainAndRegistry( | 387 net::RegistryControlledDomainService::GetDomainAndRegistry( |
| 392 cur_document_.url()); | 388 cur_document_.url()); |
| 393 return true; | |
| 394 } | 389 } |
| 395 | 390 |
| 396 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { | 391 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { |
| 397 DCHECK(!cur_document_.isNull()); | 392 DCHECK(!cur_document_.isNull()); |
| 398 WebKit::WebFrame* frame = cur_document_.frame(); | 393 WebKit::WebFrame* frame = cur_document_.frame(); |
| 399 // Advance to the next frame that contains a document, with no wrapping. | 394 // Advance to the next frame that contains a document, with no wrapping. |
| 400 if (frame) { | 395 if (frame) { |
| 401 while ((frame = frame->traverseNext(false))) { | 396 while ((frame = frame->traverseNext(false))) { |
| 402 if (!frame->document().isNull()) { | 397 if (!frame->document().isNull()) { |
| 403 return frame->document(); | 398 return frame->document(); |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 493 // Record number of script tags (discretized for numerical stability.) | 488 // Record number of script tags (discretized for numerical stability.) |
| 494 if (page_feature_state_->num_script_tags > 1) { | 489 if (page_feature_state_->num_script_tags > 1) { |
| 495 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); | 490 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
| 496 if (page_feature_state_->num_script_tags > 6) { | 491 if (page_feature_state_->num_script_tags > 6) { |
| 497 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); | 492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); |
| 498 } | 493 } |
| 499 } | 494 } |
| 500 } | 495 } |
| 501 | 496 |
| 502 } // namespace safe_browsing | 497 } // namespace safe_browsing |
| OLD | NEW |