| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | |
| 6 | |
| 7 #include <string> | |
| 8 #include <vector> | |
| 9 #include "chrome/renderer/safe_browsing/features.h" | |
| 10 #include "chrome/renderer/safe_browsing/test_utils.h" | |
| 11 #include "testing/gmock/include/gmock/gmock.h" | |
| 12 #include "testing/gtest/include/gtest/gtest.h" | |
| 13 #include "url/gurl.h" | |
| 14 | |
| 15 using ::testing::ElementsAre; | |
| 16 | |
| 17 namespace safe_browsing { | |
| 18 | |
| 19 class PhishingUrlFeatureExtractorTest : public ::testing::Test { | |
| 20 protected: | |
| 21 PhishingUrlFeatureExtractor extractor_; | |
| 22 | |
| 23 void SplitStringIntoLongAlphanumTokens(const std::string& full, | |
| 24 std::vector<std::string>* tokens) { | |
| 25 PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full, | |
| 26 tokens); | |
| 27 } | |
| 28 }; | |
| 29 | |
| 30 TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { | |
| 31 std::string url = "http://123.0.0.1/mydocuments/a.file.html"; | |
| 32 FeatureMap expected_features; | |
| 33 expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress); | |
| 34 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 35 std::string("mydocuments")); | |
| 36 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 37 std::string("file")); | |
| 38 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 39 std::string("html")); | |
| 40 | |
| 41 FeatureMap features; | |
| 42 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 43 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 44 | |
| 45 url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear"; | |
| 46 expected_features.Clear(); | |
| 47 expected_features.AddBooleanFeature(features::kUrlTldToken + | |
| 48 std::string("co.uk")); | |
| 49 expected_features.AddBooleanFeature(features::kUrlDomainToken + | |
| 50 std::string("cnn")); | |
| 51 expected_features.AddBooleanFeature(features::kUrlOtherHostToken + | |
| 52 std::string("www")); | |
| 53 expected_features.AddBooleanFeature(features::kUrlNumOtherHostTokensGTOne); | |
| 54 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 55 std::string("sports")); | |
| 56 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 57 std::string("index")); | |
| 58 expected_features.AddBooleanFeature(features::kUrlPathToken + | |
| 59 std::string("html")); | |
| 60 | |
| 61 features.Clear(); | |
| 62 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 63 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 64 | |
| 65 url = "http://justadomain.com/"; | |
| 66 expected_features.Clear(); | |
| 67 expected_features.AddBooleanFeature(features::kUrlTldToken + | |
| 68 std::string("com")); | |
| 69 expected_features.AddBooleanFeature(features::kUrlDomainToken + | |
| 70 std::string("justadomain")); | |
| 71 | |
| 72 features.Clear(); | |
| 73 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 74 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 75 | |
| 76 url = "http://witharef.com/#abc"; | |
| 77 expected_features.Clear(); | |
| 78 expected_features.AddBooleanFeature(features::kUrlTldToken + | |
| 79 std::string("com")); | |
| 80 expected_features.AddBooleanFeature(features::kUrlDomainToken + | |
| 81 std::string("witharef")); | |
| 82 | |
| 83 features.Clear(); | |
| 84 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 85 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 86 | |
| 87 url = "http://...www..lotsodots....com./"; | |
| 88 expected_features.Clear(); | |
| 89 expected_features.AddBooleanFeature(features::kUrlTldToken + | |
| 90 std::string("com")); | |
| 91 expected_features.AddBooleanFeature(features::kUrlDomainToken + | |
| 92 std::string("lotsodots")); | |
| 93 expected_features.AddBooleanFeature(features::kUrlOtherHostToken + | |
| 94 std::string("www")); | |
| 95 | |
| 96 features.Clear(); | |
| 97 ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 98 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 99 | |
| 100 url = "http://unrecognized.tld/"; | |
| 101 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 102 | |
| 103 url = "http://com/123"; | |
| 104 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 105 | |
| 106 url = "http://.co.uk/"; | |
| 107 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 108 | |
| 109 url = "file:///nohost.txt"; | |
| 110 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 111 | |
| 112 url = "not:valid:at:all"; | |
| 113 EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); | |
| 114 } | |
| 115 | |
| 116 TEST_F(PhishingUrlFeatureExtractorTest, SplitStringIntoLongAlphanumTokens) { | |
| 117 std::string full = "This.is/a_pretty\\unusual-!path,indeed"; | |
| 118 std::vector<std::string> long_tokens; | |
| 119 SplitStringIntoLongAlphanumTokens(full, &long_tokens); | |
| 120 EXPECT_THAT(long_tokens, | |
| 121 ElementsAre("This", "pretty", "unusual", "path", "indeed")); | |
| 122 | |
| 123 long_tokens.clear(); | |
| 124 full = "...i-am_re/al&ly\\b,r,o|k=e:n///up%20"; | |
| 125 SplitStringIntoLongAlphanumTokens(full, &long_tokens); | |
| 126 EXPECT_THAT(long_tokens, ElementsAre()); | |
| 127 } | |
| 128 | |
| 129 } // namespace safe_browsing | |
| OLD | NEW |