| Index: components/dom_distiller/core/page_features_unittest.cc
 | 
| diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc
 | 
| index 8e259e4382bb39a998cfe82e692d03233792c1c6..7a62af1859e9d1763100e24487546ebf04a68dda 100644
 | 
| --- a/components/dom_distiller/core/page_features_unittest.cc
 | 
| +++ b/components/dom_distiller/core/page_features_unittest.cc
 | 
| @@ -13,86 +13,83 @@
 | 
|  #include "base/memory/scoped_ptr.h"
 | 
|  #include "base/path_service.h"
 | 
|  #include "testing/gtest/include/gtest/gtest.h"
 | 
| +#include "third_party/WebKit/public/platform/WebDistillability.h"
 | 
| +#include "url/gurl.h"
 | 
|  
 | 
|  namespace dom_distiller {
 | 
|  
 | 
|  // This test uses input data of core features and the output of the training
 | 
|  // pipeline's derived feature extraction to ensure that the extraction that is
 | 
|  // done in Chromium matches that in the training pipeline.
 | 
| -TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
 | 
| -  base::FilePath dir_source_root;
 | 
| -  EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root));
 | 
| -  std::string input_data;
 | 
| -  ASSERT_TRUE(base::ReadFileToString(
 | 
| -      dir_source_root.AppendASCII(
 | 
| -          "components/test/data/dom_distiller/core_features.json"),
 | 
| -      &input_data));
 | 
| -  std::string expected_output_data;
 | 
| -  // This file contains the output from the calculation of derived features in
 | 
| -  // the training pipeline.
 | 
| -  ASSERT_TRUE(base::ReadFileToString(
 | 
| -      dir_source_root.AppendASCII(
 | 
| -          "components/test/data/dom_distiller/derived_features.json"),
 | 
| -      &expected_output_data));
 | 
| -
 | 
| -  scoped_ptr<base::Value> input_json = base::JSONReader::Read(input_data);
 | 
| -  ASSERT_TRUE(input_json);
 | 
| -
 | 
| -  scoped_ptr<base::Value> expected_output_json =
 | 
| -      base::JSONReader::Read(expected_output_data);
 | 
| -  ASSERT_TRUE(expected_output_json);
 | 
| -
 | 
| -  base::ListValue* input_entries;
 | 
| -  ASSERT_TRUE(input_json->GetAsList(&input_entries));
 | 
| -  ASSERT_GT(input_entries->GetSize(), 0u);
 | 
| -
 | 
| -  base::ListValue* expected_output_entries;
 | 
| -  ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries));
 | 
| -  ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize());
 | 
| -
 | 
| -  // In the output, the features list is a sequence of labels followed by values
 | 
| -  // (so labels at even indices, values at odd indices).
 | 
| -  base::DictionaryValue* entry;
 | 
| -  base::ListValue* derived_features;
 | 
| -  ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry));
 | 
| -  ASSERT_TRUE(entry->GetList("features", &derived_features));
 | 
| -  std::vector<std::string> labels;
 | 
| -  for (size_t i = 0; i < derived_features->GetSize(); i += 2) {
 | 
| -    std::string label;
 | 
| -    ASSERT_TRUE(derived_features->GetString(i, &label));
 | 
| -    labels.push_back(label);
 | 
| -  }
 | 
| -
 | 
| -  for (size_t i = 0; i < input_entries->GetSize(); ++i) {
 | 
| -    base::DictionaryValue* core_features;
 | 
| -    ASSERT_TRUE(input_entries->GetDictionary(i, &entry));
 | 
| -    ASSERT_TRUE(entry->GetDictionary("features", &core_features));
 | 
| -    // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified
 | 
| -    // JSON (and not a base::Value of the JSON itself)
 | 
| -    std::string stringified_json;
 | 
| -    ASSERT_TRUE(base::JSONWriter::Write(*core_features, &stringified_json));
 | 
| -    scoped_ptr<base::Value> stringified_value(
 | 
| -        new base::StringValue(stringified_json));
 | 
| -    std::vector<double> derived(
 | 
| -        CalculateDerivedFeaturesFromJSON(stringified_value.get()));
 | 
| -
 | 
| -    ASSERT_EQ(labels.size(), derived.size());
 | 
| -    ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry));
 | 
| -    ASSERT_TRUE(entry->GetList("features", &derived_features));
 | 
| -    std::string entry_url;
 | 
| -    ASSERT_TRUE(entry->GetString("url", &entry_url));
 | 
| -    for (size_t j = 0, value_index = 1; j < derived.size();
 | 
| -         ++j, value_index += 2) {
 | 
| -      double expected_value;
 | 
| -      if (!derived_features->GetDouble(value_index, &expected_value)) {
 | 
| -        bool bool_value;
 | 
| -        ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value));
 | 
| -        expected_value = bool_value ? 1.0 : 0.0;
 | 
| -      }
 | 
| -      EXPECT_DOUBLE_EQ(derived[j], expected_value)
 | 
| -          << "incorrect value for entry with url " << entry_url
 | 
| -          << " for derived feature " << labels[j];
 | 
| -    }
 | 
| -  }
 | 
| +
 | 
| +TEST(DomDistillerPageFeaturesTest, TestPath) {
 | 
| +  blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
 | 
| +
 | 
| +  GURL url("http://example.com/search/view/index/the-title-of-archive.php");
 | 
| +
 | 
| +  std::vector<double> derived(CalculateDerivedFeatures(f, url));
 | 
| +  EXPECT_EQ(kDerivedFeaturesCount, derived.size());
 | 
| +
 | 
| +  EXPECT_EQ(0, lround(derived[1]));
 | 
| +  EXPECT_EQ(1, lround(derived[2]));
 | 
| +  EXPECT_EQ(1, lround(derived[3]));
 | 
| +  EXPECT_EQ(1, lround(derived[4]));
 | 
| +  EXPECT_EQ(1, lround(derived[5]));
 | 
| +  EXPECT_EQ(0, lround(derived[6]));
 | 
| +  EXPECT_EQ(0, lround(derived[7]));
 | 
| +  EXPECT_EQ(1, lround(derived[8]));
 | 
| +  EXPECT_EQ(43, lround(derived[9]));
 | 
| +  EXPECT_EQ(0, lround(derived[10]));
 | 
| +  EXPECT_EQ(4, lround(derived[11]));
 | 
| +  EXPECT_EQ(4, lround(derived[12]));
 | 
| +  EXPECT_EQ(0, lround(derived[13]));
 | 
| +  EXPECT_EQ(24, lround(derived[14]));
 | 
|  }
 | 
| +
 | 
| +TEST(DomDistillerPageFeaturesTest, TestPath2) {
 | 
| +  blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
 | 
| +
 | 
| +  GURL url("http://example.com/phpbb/forum123/456.asp");
 | 
| +
 | 
| +  std::vector<double> derived(CalculateDerivedFeatures(f, url));
 | 
| +  EXPECT_EQ(kDerivedFeaturesCount, derived.size());
 | 
| +  EXPECT_EQ(1, lround(derived[1]));
 | 
| +  EXPECT_EQ(0, lround(derived[2]));
 | 
| +  EXPECT_EQ(0, lround(derived[3]));
 | 
| +  EXPECT_EQ(0, lround(derived[4]));
 | 
| +  EXPECT_EQ(0, lround(derived[5]));
 | 
| +  EXPECT_EQ(1, lround(derived[6]));
 | 
| +  EXPECT_EQ(1, lround(derived[7]));
 | 
| +  EXPECT_EQ(0, lround(derived[8]));
 | 
| +  EXPECT_EQ(23, lround(derived[9]));
 | 
| +  EXPECT_EQ(0, lround(derived[10]));
 | 
| +  EXPECT_EQ(3, lround(derived[11]));
 | 
| +  EXPECT_EQ(1, lround(derived[12]));
 | 
| +  EXPECT_EQ(2, lround(derived[13]));
 | 
| +  EXPECT_EQ(7, lround(derived[14]));
 | 
| +}
 | 
| +
 | 
| +TEST(DomDistillerPageFeaturesTest, TestPath3) {
 | 
| +  blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
 | 
| +
 | 
| +  GURL url("https://example.com/");
 | 
| +
 | 
| +  std::vector<double> derived(CalculateDerivedFeatures(f, url));
 | 
| +  EXPECT_EQ(kDerivedFeaturesCount, derived.size());
 | 
| +  EXPECT_EQ(0, lround(derived[1]));
 | 
| +  EXPECT_EQ(0, lround(derived[2]));
 | 
| +  EXPECT_EQ(0, lround(derived[3]));
 | 
| +  EXPECT_EQ(0, lround(derived[4]));
 | 
| +  EXPECT_EQ(0, lround(derived[5]));
 | 
| +  EXPECT_EQ(0, lround(derived[6]));
 | 
| +  EXPECT_EQ(0, lround(derived[7]));
 | 
| +  EXPECT_EQ(0, lround(derived[8]));
 | 
| +  EXPECT_EQ(1, lround(derived[9]));
 | 
| +  EXPECT_EQ(1, lround(derived[10]));
 | 
| +  EXPECT_EQ(0, lround(derived[11]));
 | 
| +  EXPECT_EQ(0, lround(derived[12]));
 | 
| +  EXPECT_EQ(0, lround(derived[13]));
 | 
| +  EXPECT_EQ(0, lround(derived[14]));
 | 
| +}
 | 
| +
 | 
|  }
 | 
| 
 |