| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 // | |
| 5 // PhishingUrlFeatureExtractor handles computing URL-based features for | |
| 6 // the client-side phishing detection model. These include tokens in the | |
| 7 // host and path, features pertaining to host length, and IP addresses. | |
| 8 | |
| 9 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_ | |
| 10 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_ | |
| 11 | |
| 12 #include <stddef.h> | |
| 13 | |
| 14 #include <string> | |
| 15 #include <vector> | |
| 16 | |
| 17 #include "base/macros.h" | |
| 18 | |
| 19 class GURL; | |
| 20 | |
| 21 namespace safe_browsing { | |
| 22 class FeatureMap; | |
| 23 | |
| 24 class PhishingUrlFeatureExtractor { | |
| 25 public: | |
| 26 PhishingUrlFeatureExtractor(); | |
| 27 ~PhishingUrlFeatureExtractor(); | |
| 28 | |
| 29 // Extracts features for |url| into the given feature map. | |
| 30 // Returns true on success. | |
| 31 bool ExtractFeatures(const GURL& url, FeatureMap* features); | |
| 32 | |
| 33 private: | |
| 34 friend class PhishingUrlFeatureExtractorTest; | |
| 35 | |
| 36 static const size_t kMinPathComponentLength = 3; | |
| 37 | |
| 38 // Given a string, finds all substrings of consecutive alphanumeric | |
| 39 // characters of length >= kMinPathComponentLength and inserts them into | |
| 40 // tokens. | |
| 41 static void SplitStringIntoLongAlphanumTokens( | |
| 42 const std::string& full, | |
| 43 std::vector<std::string>* tokens); | |
| 44 | |
| 45 DISALLOW_COPY_AND_ASSIGN(PhishingUrlFeatureExtractor); | |
| 46 }; | |
| 47 | |
| 48 } // namespace safe_browsing | |
| 49 | |
| 50 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_URL_FEATURE_EXTRACTOR_H_ | |
| OLD | NEW |