OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
7 | 7 |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/values.h" | 10 #include "base/values.h" |
11 #include "url/gurl.h" | 11 #include "url/gurl.h" |
12 | 12 |
| 13 class GURL; |
| 14 |
13 namespace dom_distiller { | 15 namespace dom_distiller { |
14 | 16 |
15 // The length of the derived features vector. | 17 // The length of the derived features vector. |
16 extern int kDerivedFeaturesCount; | 18 extern int kDerivedFeaturesCount; |
17 | 19 |
18 // The distillable page detector is a model trained on a list of numeric | 20 // The distillable page detector is a model trained on a list of numeric |
19 // features derived from core more complex features of a webpage (like the | 21 // features derived from features of a webpage (like body's number of elements |
20 // body's .textContent). This derives the numeric features for a set of core | 22 // ). This derives the numeric features form a set of core features. |
21 // features. | |
22 // | 23 // |
23 // Note: It is crucial that these features are derived in the same way and are | 24 // Note: It is crucial that these features are derived in the same way and are |
24 // in the same order as in the training pipeline. See //heuristics/distillable | 25 // in the same order as in the training pipeline. See //heuristics/distillable |
25 // in the external DomDistillerJs repo. | 26 // in the external DomDistiller repo. |
26 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, | 27 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, |
27 const GURL& url, | 28 const GURL& url, |
28 double numElements, | 29 double numElements, |
29 double numAnchors, | 30 double numAnchors, |
30 double numForms, | 31 double numForms, |
31 const std::string& innerText, | 32 const std::string& innerText, |
32 const std::string& textContent, | 33 const std::string& textContent, |
33 const std::string& innerHTML); | 34 const std::string& innerHTML); |
34 | 35 |
35 // Calculates the derived features from the JSON value as returned by the | 36 // Calculates the derived features from the JSON value as returned by the |
36 // javascript core feature extraction. | 37 // javascript core feature extraction. |
37 std::vector<double> CalculateDerivedFeaturesFromJSON( | 38 std::vector<double> CalculateDerivedFeaturesFromJSON( |
38 const base::Value* stringified_json); | 39 const base::Value* stringified_json); |
39 | 40 |
| 41 std::vector<double> CalculateDerivedFeatures( |
| 42 bool openGraph, |
| 43 const GURL& url, |
| 44 unsigned elementCount, |
| 45 unsigned anchorCount, |
| 46 unsigned formCount, |
| 47 double mozScore, |
| 48 double mozScoreAllSqrt, |
| 49 double mozScoreAllLinear); |
| 50 |
40 } // namespace dom_distiller | 51 } // namespace dom_distiller |
41 | 52 |
42 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 53 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
OLD | NEW |