| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
| 6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
| 7 | 7 |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/values.h" | 10 class GURL; |
| 11 #include "url/gurl.h" | 11 |
| 12 namespace blink { |
| 13 struct WebDistillabilityFeatures; |
| 14 } |
| 12 | 15 |
| 13 namespace dom_distiller { | 16 namespace dom_distiller { |
| 14 | 17 |
| 15 // The length of the derived features vector. | 18 // The length of the derived features vector. |
| 16 extern int kDerivedFeaturesCount; | 19 extern unsigned kDerivedFeaturesCount; |
| 17 | 20 |
| 18 // The distillable page detector is a model trained on a list of numeric | 21 // The distillable page detector is a model trained on a list of numeric |
| 19 // features derived from core more complex features of a webpage (like the | 22 // features derived from features of a webpage (like body's number of elements |
| 20 // body's .textContent). This derives the numeric features for a set of core | 23 // ). This derives the numeric features form a set of core features. |
| 21 // features. | |
| 22 // | 24 // |
| 23 // Note: It is crucial that these features are derived in the same way and are | 25 // Note: It is crucial that these features are derived in the same way and are |
| 24 // in the same order as in the training pipeline. See //heuristics/distillable | 26 // in the same order as in the training pipeline. See //heuristics/distillable |
| 25 // in the external DomDistillerJs repo. | 27 // in the external DomDistiller repo. |
| 26 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, | 28 std::vector<double> CalculateDerivedFeatures( |
| 27 const GURL& url, | 29 const blink::WebDistillabilityFeatures& f, |
| 28 double numElements, | 30 const GURL& url); |
| 29 double numAnchors, | |
| 30 double numForms, | |
| 31 const std::string& innerText, | |
| 32 const std::string& textContent, | |
| 33 const std::string& innerHTML); | |
| 34 | |
| 35 // Calculates the derived features from the JSON value as returned by the | |
| 36 // javascript core feature extraction. | |
| 37 std::vector<double> CalculateDerivedFeaturesFromJSON( | |
| 38 const base::Value* stringified_json); | |
| 39 | 31 |
| 40 } // namespace dom_distiller | 32 } // namespace dom_distiller |
| 41 | 33 |
| 42 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ | 34 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
| OLD | NEW |