| Index: components/dom_distiller/core/page_features.cc
 | 
| diff --git a/components/dom_distiller/core/page_features.cc b/components/dom_distiller/core/page_features.cc
 | 
| index f931bbe5cb23cdc75513005d00cac20c84b92b93..8425dc78ecd3fc57834f70ed0af4f663424d5503 100644
 | 
| --- a/components/dom_distiller/core/page_features.cc
 | 
| +++ b/components/dom_distiller/core/page_features.cc
 | 
| @@ -8,6 +8,7 @@
 | 
|  
 | 
|  #include "base/json/json_reader.h"
 | 
|  #include "third_party/re2/re2/re2.h"
 | 
| +#include "url/gurl.h"
 | 
|  
 | 
|  namespace dom_distiller {
 | 
|  /* This code needs to derive features in the same way and order in which they
 | 
| @@ -179,4 +180,64 @@ std::vector<double> CalculateDerivedFeaturesFromJSON(
 | 
|                                    innerHTML);
 | 
|  }
 | 
|  
 | 
| +std::vector<double> CalculateDerivedFeatures(
 | 
| +    bool openGraph,
 | 
| +    const GURL& url,
 | 
| +    unsigned elementCount,
 | 
| +    unsigned anchorCount,
 | 
| +    unsigned formCount,
 | 
| +    double mozScore,
 | 
| +    double mozScoreAllSqrt,
 | 
| +    double mozScoreAllLinear) {
 | 
| +  const std::string& path = url.path();
 | 
| +  std::vector<double> features;
 | 
| +  // 'opengraph', opengraph,
 | 
| +  features.push_back(openGraph);
 | 
| +  // 'forum', 'forum' in path,
 | 
| +  features.push_back(Contains("forum", path));
 | 
| +  // 'index', 'index' in path,
 | 
| +  features.push_back(Contains("index", path));
 | 
| +  // 'search', 'search' in path,
 | 
| +  features.push_back(Contains("search", path));
 | 
| +  // 'view', 'view' in path,
 | 
| +  features.push_back(Contains("view", path));
 | 
| +  // 'archive', 'archive' in path,
 | 
| +  features.push_back(Contains("archive", path));
 | 
| +  // 'asp', '.asp' in path,
 | 
| +  features.push_back(Contains(".asp", path));
 | 
| +  // 'phpbb', 'phpbb' in path,
 | 
| +  features.push_back(Contains("phpbb", path));
 | 
| +  // 'php', path.endswith('.php'),
 | 
| +  features.push_back(EndsWith(".php", path));
 | 
| +  // 'pathLength', len(path),
 | 
| +  features.push_back(path.size());
 | 
| +  // 'domain', len(path) < 2,
 | 
| +  features.push_back(path.size() < 2);
 | 
| +  // 'pathComponents', CountMatches(path, r'\/.'),
 | 
| +  features.push_back(CountMatches(path, "\\/."));
 | 
| +  // 'slugDetector', CountMatches(path, r'[^\w/]'),
 | 
| +  features.push_back(CountMatches(path, "[^\\w/]"));
 | 
| +  // 'pathNumbers', CountMatches(path, r'\d+'),
 | 
| +  features.push_back(CountMatches(path, "\\d+"));
 | 
| +  // 'lastSegmentLength', len(GetLastSegment(path)),
 | 
| +  features.push_back(GetLastSegment(path).size());
 | 
| +  // 'formCount', numForms,
 | 
| +  features.push_back(formCount);
 | 
| +  // 'anchorCount', numAnchors,
 | 
| +  features.push_back(anchorCount);
 | 
| +  // 'elementCount', numElements,
 | 
| +  features.push_back(elementCount);
 | 
| +  // 'anchorRatio', float(numAnchors) / max(1, numElements),
 | 
| +  features.push_back(
 | 
| +      double(anchorCount) / std::max<double>(1, elementCount));
 | 
| +  // 'mozScore'
 | 
| +  features.push_back(mozScore);
 | 
| +  // 'mozScoreAllSqrt'
 | 
| +  features.push_back(mozScoreAllSqrt);
 | 
| +  // 'mozScoreAllLinear'
 | 
| +  features.push_back(mozScoreAllLinear);
 | 
| +
 | 
| +  return features;
 | 
| +}
 | 
| +
 | 
|  }  // namespace dom_distiller
 | 
| 
 |