Index: components/dom_distiller/core/page_features_unittest.cc |
diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc |
index 8e259e4382bb39a998cfe82e692d03233792c1c6..7a62af1859e9d1763100e24487546ebf04a68dda 100644 |
--- a/components/dom_distiller/core/page_features_unittest.cc |
+++ b/components/dom_distiller/core/page_features_unittest.cc |
@@ -13,86 +13,83 @@ |
#include "base/memory/scoped_ptr.h" |
#include "base/path_service.h" |
#include "testing/gtest/include/gtest/gtest.h" |
+#include "third_party/WebKit/public/platform/WebDistillability.h" |
+#include "url/gurl.h" |
namespace dom_distiller { |
// This test uses input data of core features and the output of the training |
// pipeline's derived feature extraction to ensure that the extraction that is |
// done in Chromium matches that in the training pipeline. |
-TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { |
- base::FilePath dir_source_root; |
- EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); |
- std::string input_data; |
- ASSERT_TRUE(base::ReadFileToString( |
- dir_source_root.AppendASCII( |
- "components/test/data/dom_distiller/core_features.json"), |
- &input_data)); |
- std::string expected_output_data; |
- // This file contains the output from the calculation of derived features in |
- // the training pipeline. |
- ASSERT_TRUE(base::ReadFileToString( |
- dir_source_root.AppendASCII( |
- "components/test/data/dom_distiller/derived_features.json"), |
- &expected_output_data)); |
- |
- scoped_ptr<base::Value> input_json = base::JSONReader::Read(input_data); |
- ASSERT_TRUE(input_json); |
- |
- scoped_ptr<base::Value> expected_output_json = |
- base::JSONReader::Read(expected_output_data); |
- ASSERT_TRUE(expected_output_json); |
- |
- base::ListValue* input_entries; |
- ASSERT_TRUE(input_json->GetAsList(&input_entries)); |
- ASSERT_GT(input_entries->GetSize(), 0u); |
- |
- base::ListValue* expected_output_entries; |
- ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries)); |
- ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize()); |
- |
- // In the output, the features list is a sequence of labels followed by values |
- // (so labels at even indices, values at odd indices). |
- base::DictionaryValue* entry; |
- base::ListValue* derived_features; |
- ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry)); |
- ASSERT_TRUE(entry->GetList("features", &derived_features)); |
- std::vector<std::string> labels; |
- for (size_t i = 0; i < derived_features->GetSize(); i += 2) { |
- std::string label; |
- ASSERT_TRUE(derived_features->GetString(i, &label)); |
- labels.push_back(label); |
- } |
- |
- for (size_t i = 0; i < input_entries->GetSize(); ++i) { |
- base::DictionaryValue* core_features; |
- ASSERT_TRUE(input_entries->GetDictionary(i, &entry)); |
- ASSERT_TRUE(entry->GetDictionary("features", &core_features)); |
- // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified |
- // JSON (and not a base::Value of the JSON itself) |
- std::string stringified_json; |
- ASSERT_TRUE(base::JSONWriter::Write(*core_features, &stringified_json)); |
- scoped_ptr<base::Value> stringified_value( |
- new base::StringValue(stringified_json)); |
- std::vector<double> derived( |
- CalculateDerivedFeaturesFromJSON(stringified_value.get())); |
- |
- ASSERT_EQ(labels.size(), derived.size()); |
- ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry)); |
- ASSERT_TRUE(entry->GetList("features", &derived_features)); |
- std::string entry_url; |
- ASSERT_TRUE(entry->GetString("url", &entry_url)); |
- for (size_t j = 0, value_index = 1; j < derived.size(); |
- ++j, value_index += 2) { |
- double expected_value; |
- if (!derived_features->GetDouble(value_index, &expected_value)) { |
- bool bool_value; |
- ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); |
- expected_value = bool_value ? 1.0 : 0.0; |
- } |
- EXPECT_DOUBLE_EQ(derived[j], expected_value) |
- << "incorrect value for entry with url " << entry_url |
- << " for derived feature " << labels[j]; |
- } |
- } |
+ |
+TEST(DomDistillerPageFeaturesTest, TestPath) { |
+ blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
+ |
+ GURL url("http://example.com/search/view/index/the-title-of-archive.php"); |
+ |
+ std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
+ EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
+ |
+ EXPECT_EQ(0, lround(derived[1])); |
+ EXPECT_EQ(1, lround(derived[2])); |
+ EXPECT_EQ(1, lround(derived[3])); |
+ EXPECT_EQ(1, lround(derived[4])); |
+ EXPECT_EQ(1, lround(derived[5])); |
+ EXPECT_EQ(0, lround(derived[6])); |
+ EXPECT_EQ(0, lround(derived[7])); |
+ EXPECT_EQ(1, lround(derived[8])); |
+ EXPECT_EQ(43, lround(derived[9])); |
+ EXPECT_EQ(0, lround(derived[10])); |
+ EXPECT_EQ(4, lround(derived[11])); |
+ EXPECT_EQ(4, lround(derived[12])); |
+ EXPECT_EQ(0, lround(derived[13])); |
+ EXPECT_EQ(24, lround(derived[14])); |
} |
+ |
+TEST(DomDistillerPageFeaturesTest, TestPath2) { |
+ blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
+ |
+ GURL url("http://example.com/phpbb/forum123/456.asp"); |
+ |
+ std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
+ EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
+ EXPECT_EQ(1, lround(derived[1])); |
+ EXPECT_EQ(0, lround(derived[2])); |
+ EXPECT_EQ(0, lround(derived[3])); |
+ EXPECT_EQ(0, lround(derived[4])); |
+ EXPECT_EQ(0, lround(derived[5])); |
+ EXPECT_EQ(1, lround(derived[6])); |
+ EXPECT_EQ(1, lround(derived[7])); |
+ EXPECT_EQ(0, lround(derived[8])); |
+ EXPECT_EQ(23, lround(derived[9])); |
+ EXPECT_EQ(0, lround(derived[10])); |
+ EXPECT_EQ(3, lround(derived[11])); |
+ EXPECT_EQ(1, lround(derived[12])); |
+ EXPECT_EQ(2, lround(derived[13])); |
+ EXPECT_EQ(7, lround(derived[14])); |
+} |
+ |
+TEST(DomDistillerPageFeaturesTest, TestPath3) { |
+ blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
+ |
+ GURL url("https://example.com/"); |
+ |
+ std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
+ EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
+ EXPECT_EQ(0, lround(derived[1])); |
+ EXPECT_EQ(0, lround(derived[2])); |
+ EXPECT_EQ(0, lround(derived[3])); |
+ EXPECT_EQ(0, lround(derived[4])); |
+ EXPECT_EQ(0, lround(derived[5])); |
+ EXPECT_EQ(0, lround(derived[6])); |
+ EXPECT_EQ(0, lround(derived[7])); |
+ EXPECT_EQ(0, lround(derived[8])); |
+ EXPECT_EQ(1, lround(derived[9])); |
+ EXPECT_EQ(1, lround(derived[10])); |
+ EXPECT_EQ(0, lround(derived[11])); |
+ EXPECT_EQ(0, lround(derived[12])); |
+ EXPECT_EQ(0, lround(derived[13])); |
+ EXPECT_EQ(0, lround(derived[14])); |
+} |
+ |
} |