| Index: components/dom_distiller/core/page_features_unittest.cc
|
| diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc
|
| index 8e259e4382bb39a998cfe82e692d03233792c1c6..7a62af1859e9d1763100e24487546ebf04a68dda 100644
|
| --- a/components/dom_distiller/core/page_features_unittest.cc
|
| +++ b/components/dom_distiller/core/page_features_unittest.cc
|
| @@ -13,86 +13,83 @@
|
| #include "base/memory/scoped_ptr.h"
|
| #include "base/path_service.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
| +#include "third_party/WebKit/public/platform/WebDistillability.h"
|
| +#include "url/gurl.h"
|
|
|
| namespace dom_distiller {
|
|
|
| // This test uses input data of core features and the output of the training
|
| // pipeline's derived feature extraction to ensure that the extraction that is
|
| // done in Chromium matches that in the training pipeline.
|
| -TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
|
| - base::FilePath dir_source_root;
|
| - EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root));
|
| - std::string input_data;
|
| - ASSERT_TRUE(base::ReadFileToString(
|
| - dir_source_root.AppendASCII(
|
| - "components/test/data/dom_distiller/core_features.json"),
|
| - &input_data));
|
| - std::string expected_output_data;
|
| - // This file contains the output from the calculation of derived features in
|
| - // the training pipeline.
|
| - ASSERT_TRUE(base::ReadFileToString(
|
| - dir_source_root.AppendASCII(
|
| - "components/test/data/dom_distiller/derived_features.json"),
|
| - &expected_output_data));
|
| -
|
| - scoped_ptr<base::Value> input_json = base::JSONReader::Read(input_data);
|
| - ASSERT_TRUE(input_json);
|
| -
|
| - scoped_ptr<base::Value> expected_output_json =
|
| - base::JSONReader::Read(expected_output_data);
|
| - ASSERT_TRUE(expected_output_json);
|
| -
|
| - base::ListValue* input_entries;
|
| - ASSERT_TRUE(input_json->GetAsList(&input_entries));
|
| - ASSERT_GT(input_entries->GetSize(), 0u);
|
| -
|
| - base::ListValue* expected_output_entries;
|
| - ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries));
|
| - ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize());
|
| -
|
| - // In the output, the features list is a sequence of labels followed by values
|
| - // (so labels at even indices, values at odd indices).
|
| - base::DictionaryValue* entry;
|
| - base::ListValue* derived_features;
|
| - ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry));
|
| - ASSERT_TRUE(entry->GetList("features", &derived_features));
|
| - std::vector<std::string> labels;
|
| - for (size_t i = 0; i < derived_features->GetSize(); i += 2) {
|
| - std::string label;
|
| - ASSERT_TRUE(derived_features->GetString(i, &label));
|
| - labels.push_back(label);
|
| - }
|
| -
|
| - for (size_t i = 0; i < input_entries->GetSize(); ++i) {
|
| - base::DictionaryValue* core_features;
|
| - ASSERT_TRUE(input_entries->GetDictionary(i, &entry));
|
| - ASSERT_TRUE(entry->GetDictionary("features", &core_features));
|
| - // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified
|
| - // JSON (and not a base::Value of the JSON itself)
|
| - std::string stringified_json;
|
| - ASSERT_TRUE(base::JSONWriter::Write(*core_features, &stringified_json));
|
| - scoped_ptr<base::Value> stringified_value(
|
| - new base::StringValue(stringified_json));
|
| - std::vector<double> derived(
|
| - CalculateDerivedFeaturesFromJSON(stringified_value.get()));
|
| -
|
| - ASSERT_EQ(labels.size(), derived.size());
|
| - ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry));
|
| - ASSERT_TRUE(entry->GetList("features", &derived_features));
|
| - std::string entry_url;
|
| - ASSERT_TRUE(entry->GetString("url", &entry_url));
|
| - for (size_t j = 0, value_index = 1; j < derived.size();
|
| - ++j, value_index += 2) {
|
| - double expected_value;
|
| - if (!derived_features->GetDouble(value_index, &expected_value)) {
|
| - bool bool_value;
|
| - ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value));
|
| - expected_value = bool_value ? 1.0 : 0.0;
|
| - }
|
| - EXPECT_DOUBLE_EQ(derived[j], expected_value)
|
| - << "incorrect value for entry with url " << entry_url
|
| - << " for derived feature " << labels[j];
|
| - }
|
| - }
|
| +
|
| +TEST(DomDistillerPageFeaturesTest, TestPath) {
|
| + blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
|
| +
|
| + GURL url("http://example.com/search/view/index/the-title-of-archive.php");
|
| +
|
| + std::vector<double> derived(CalculateDerivedFeatures(f, url));
|
| + EXPECT_EQ(kDerivedFeaturesCount, derived.size());
|
| +
|
| + EXPECT_EQ(0, lround(derived[1]));
|
| + EXPECT_EQ(1, lround(derived[2]));
|
| + EXPECT_EQ(1, lround(derived[3]));
|
| + EXPECT_EQ(1, lround(derived[4]));
|
| + EXPECT_EQ(1, lround(derived[5]));
|
| + EXPECT_EQ(0, lround(derived[6]));
|
| + EXPECT_EQ(0, lround(derived[7]));
|
| + EXPECT_EQ(1, lround(derived[8]));
|
| + EXPECT_EQ(43, lround(derived[9]));
|
| + EXPECT_EQ(0, lround(derived[10]));
|
| + EXPECT_EQ(4, lround(derived[11]));
|
| + EXPECT_EQ(4, lround(derived[12]));
|
| + EXPECT_EQ(0, lround(derived[13]));
|
| + EXPECT_EQ(24, lround(derived[14]));
|
| }
|
| +
|
| +TEST(DomDistillerPageFeaturesTest, TestPath2) {
|
| + blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
|
| +
|
| + GURL url("http://example.com/phpbb/forum123/456.asp");
|
| +
|
| + std::vector<double> derived(CalculateDerivedFeatures(f, url));
|
| + EXPECT_EQ(kDerivedFeaturesCount, derived.size());
|
| + EXPECT_EQ(1, lround(derived[1]));
|
| + EXPECT_EQ(0, lround(derived[2]));
|
| + EXPECT_EQ(0, lround(derived[3]));
|
| + EXPECT_EQ(0, lround(derived[4]));
|
| + EXPECT_EQ(0, lround(derived[5]));
|
| + EXPECT_EQ(1, lround(derived[6]));
|
| + EXPECT_EQ(1, lround(derived[7]));
|
| + EXPECT_EQ(0, lround(derived[8]));
|
| + EXPECT_EQ(23, lround(derived[9]));
|
| + EXPECT_EQ(0, lround(derived[10]));
|
| + EXPECT_EQ(3, lround(derived[11]));
|
| + EXPECT_EQ(1, lround(derived[12]));
|
| + EXPECT_EQ(2, lround(derived[13]));
|
| + EXPECT_EQ(7, lround(derived[14]));
|
| +}
|
| +
|
| +TEST(DomDistillerPageFeaturesTest, TestPath3) {
|
| + blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
|
| +
|
| + GURL url("https://example.com/");
|
| +
|
| + std::vector<double> derived(CalculateDerivedFeatures(f, url));
|
| + EXPECT_EQ(kDerivedFeaturesCount, derived.size());
|
| + EXPECT_EQ(0, lround(derived[1]));
|
| + EXPECT_EQ(0, lround(derived[2]));
|
| + EXPECT_EQ(0, lround(derived[3]));
|
| + EXPECT_EQ(0, lround(derived[4]));
|
| + EXPECT_EQ(0, lround(derived[5]));
|
| + EXPECT_EQ(0, lround(derived[6]));
|
| + EXPECT_EQ(0, lround(derived[7]));
|
| + EXPECT_EQ(0, lround(derived[8]));
|
| + EXPECT_EQ(1, lround(derived[9]));
|
| + EXPECT_EQ(1, lround(derived[10]));
|
| + EXPECT_EQ(0, lround(derived[11]));
|
| + EXPECT_EQ(0, lround(derived[12]));
|
| + EXPECT_EQ(0, lround(derived[13]));
|
| + EXPECT_EQ(0, lround(derived[14]));
|
| +}
|
| +
|
| }
|
|
|