| Index: components/dom_distiller/core/page_features.h
|
| diff --git a/components/dom_distiller/core/page_features.h b/components/dom_distiller/core/page_features.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..919a90a1e4d18857140c8c8d964e54d6567fbb98
|
| --- /dev/null
|
| +++ b/components/dom_distiller/core/page_features.h
|
| @@ -0,0 +1,38 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
|
| +#define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
|
| +
|
| +#include <vector>
|
| +
|
| +#include "base/values.h"
|
| +#include "url/gurl.h"
|
| +
|
| +namespace dom_distiller {
|
| +
|
| +// The distillable page detector is a model trained on a list of numeric
|
| +// features derived from core more complex features of a webpage (like the
|
| +// body's .textContent). This derives the numeric features for a set of core
|
| +// features.
|
| +//
|
| +// Note: It is crucial that these features are derived in the same way and are
|
| +// in the same order as in the training pipeline. See //heuristics/distillable
|
| +// in the external DomDistillerJs repo.
|
| +std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
|
| + const GURL& url,
|
| + double numElements,
|
| + double numAnchors,
|
| + double numForms,
|
| + const std::string& innerText,
|
| + const std::string& textContent,
|
| + const std::string& innerHTML);
|
| +
|
| +// Calculates the derived features from the JSON value as returned by the
|
| +// javascript core feature extraction.
|
| +std::vector<double> CalculateDerivedFeaturesFromJSON(const base::Value* json);
|
| +
|
| +} // namespace dom_distiller
|
| +
|
| +#endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
|
|
|