OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/page_features.h" | 5 #include "components/dom_distiller/core/page_features.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/files/file_util.h" | 10 #include "base/files/file_util.h" |
11 #include "base/json/json_reader.h" | 11 #include "base/json/json_reader.h" |
12 #include "base/json/json_writer.h" | 12 #include "base/json/json_writer.h" |
13 #include "base/memory/scoped_ptr.h" | 13 #include "base/memory/scoped_ptr.h" |
14 #include "base/path_service.h" | 14 #include "base/path_service.h" |
15 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
| 16 #include "third_party/WebKit/public/platform/WebDistillability.h" |
| 17 #include "url/gurl.h" |
16 | 18 |
17 namespace dom_distiller { | 19 namespace dom_distiller { |
18 | 20 |
19 // This test uses input data of core features and the output of the training | 21 // This test uses input data of core features and the output of the training |
20 // pipeline's derived feature extraction to ensure that the extraction that is | 22 // pipeline's derived feature extraction to ensure that the extraction that is |
21 // done in Chromium matches that in the training pipeline. | 23 // done in Chromium matches that in the training pipeline. |
22 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { | |
23 base::FilePath dir_source_root; | |
24 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); | |
25 std::string input_data; | |
26 ASSERT_TRUE(base::ReadFileToString( | |
27 dir_source_root.AppendASCII( | |
28 "components/test/data/dom_distiller/core_features.json"), | |
29 &input_data)); | |
30 std::string expected_output_data; | |
31 // This file contains the output from the calculation of derived features in | |
32 // the training pipeline. | |
33 ASSERT_TRUE(base::ReadFileToString( | |
34 dir_source_root.AppendASCII( | |
35 "components/test/data/dom_distiller/derived_features.json"), | |
36 &expected_output_data)); | |
37 | 24 |
38 scoped_ptr<base::Value> input_json = base::JSONReader::Read(input_data); | 25 TEST(DomDistillerPageFeaturesTest, TestPath) { |
39 ASSERT_TRUE(input_json); | 26 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
40 | 27 |
41 scoped_ptr<base::Value> expected_output_json = | 28 GURL url("http://example.com/search/view/index/the-title-of-archive.php"); |
42 base::JSONReader::Read(expected_output_data); | |
43 ASSERT_TRUE(expected_output_json); | |
44 | 29 |
45 base::ListValue* input_entries; | 30 std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
46 ASSERT_TRUE(input_json->GetAsList(&input_entries)); | 31 EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
47 ASSERT_GT(input_entries->GetSize(), 0u); | |
48 | 32 |
49 base::ListValue* expected_output_entries; | 33 EXPECT_EQ(0, lround(derived[1])); |
50 ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries)); | 34 EXPECT_EQ(1, lround(derived[2])); |
51 ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize()); | 35 EXPECT_EQ(1, lround(derived[3])); |
| 36 EXPECT_EQ(1, lround(derived[4])); |
| 37 EXPECT_EQ(1, lround(derived[5])); |
| 38 EXPECT_EQ(0, lround(derived[6])); |
| 39 EXPECT_EQ(0, lround(derived[7])); |
| 40 EXPECT_EQ(1, lround(derived[8])); |
| 41 EXPECT_EQ(43, lround(derived[9])); |
| 42 EXPECT_EQ(0, lround(derived[10])); |
| 43 EXPECT_EQ(4, lround(derived[11])); |
| 44 EXPECT_EQ(4, lround(derived[12])); |
| 45 EXPECT_EQ(0, lround(derived[13])); |
| 46 EXPECT_EQ(24, lround(derived[14])); |
| 47 } |
52 | 48 |
53 // In the output, the features list is a sequence of labels followed by values | 49 TEST(DomDistillerPageFeaturesTest, TestPath2) { |
54 // (so labels at even indices, values at odd indices). | 50 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
55 base::DictionaryValue* entry; | |
56 base::ListValue* derived_features; | |
57 ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry)); | |
58 ASSERT_TRUE(entry->GetList("features", &derived_features)); | |
59 std::vector<std::string> labels; | |
60 for (size_t i = 0; i < derived_features->GetSize(); i += 2) { | |
61 std::string label; | |
62 ASSERT_TRUE(derived_features->GetString(i, &label)); | |
63 labels.push_back(label); | |
64 } | |
65 | 51 |
66 for (size_t i = 0; i < input_entries->GetSize(); ++i) { | 52 GURL url("http://example.com/phpbb/forum123/456.asp"); |
67 base::DictionaryValue* core_features; | |
68 ASSERT_TRUE(input_entries->GetDictionary(i, &entry)); | |
69 ASSERT_TRUE(entry->GetDictionary("features", &core_features)); | |
70 // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified | |
71 // JSON (and not a base::Value of the JSON itself) | |
72 std::string stringified_json; | |
73 ASSERT_TRUE(base::JSONWriter::Write(*core_features, &stringified_json)); | |
74 scoped_ptr<base::Value> stringified_value( | |
75 new base::StringValue(stringified_json)); | |
76 std::vector<double> derived( | |
77 CalculateDerivedFeaturesFromJSON(stringified_value.get())); | |
78 | 53 |
79 ASSERT_EQ(labels.size(), derived.size()); | 54 std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
80 ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry)); | 55 EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
81 ASSERT_TRUE(entry->GetList("features", &derived_features)); | 56 EXPECT_EQ(1, lround(derived[1])); |
82 std::string entry_url; | 57 EXPECT_EQ(0, lround(derived[2])); |
83 ASSERT_TRUE(entry->GetString("url", &entry_url)); | 58 EXPECT_EQ(0, lround(derived[3])); |
84 for (size_t j = 0, value_index = 1; j < derived.size(); | 59 EXPECT_EQ(0, lround(derived[4])); |
85 ++j, value_index += 2) { | 60 EXPECT_EQ(0, lround(derived[5])); |
86 double expected_value; | 61 EXPECT_EQ(1, lround(derived[6])); |
87 if (!derived_features->GetDouble(value_index, &expected_value)) { | 62 EXPECT_EQ(1, lround(derived[7])); |
88 bool bool_value; | 63 EXPECT_EQ(0, lround(derived[8])); |
89 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); | 64 EXPECT_EQ(23, lround(derived[9])); |
90 expected_value = bool_value ? 1.0 : 0.0; | 65 EXPECT_EQ(0, lround(derived[10])); |
91 } | 66 EXPECT_EQ(3, lround(derived[11])); |
92 EXPECT_DOUBLE_EQ(derived[j], expected_value) | 67 EXPECT_EQ(1, lround(derived[12])); |
93 << "incorrect value for entry with url " << entry_url | 68 EXPECT_EQ(2, lround(derived[13])); |
94 << " for derived feature " << labels[j]; | 69 EXPECT_EQ(7, lround(derived[14])); |
95 } | |
96 } | |
97 } | 70 } |
| 71 |
| 72 TEST(DomDistillerPageFeaturesTest, TestPath3) { |
| 73 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures(); |
| 74 |
| 75 GURL url("https://example.com/"); |
| 76 |
| 77 std::vector<double> derived(CalculateDerivedFeatures(f, url)); |
| 78 EXPECT_EQ(kDerivedFeaturesCount, derived.size()); |
| 79 EXPECT_EQ(0, lround(derived[1])); |
| 80 EXPECT_EQ(0, lround(derived[2])); |
| 81 EXPECT_EQ(0, lround(derived[3])); |
| 82 EXPECT_EQ(0, lround(derived[4])); |
| 83 EXPECT_EQ(0, lround(derived[5])); |
| 84 EXPECT_EQ(0, lround(derived[6])); |
| 85 EXPECT_EQ(0, lround(derived[7])); |
| 86 EXPECT_EQ(0, lround(derived[8])); |
| 87 EXPECT_EQ(1, lround(derived[9])); |
| 88 EXPECT_EQ(1, lround(derived[10])); |
| 89 EXPECT_EQ(0, lround(derived[11])); |
| 90 EXPECT_EQ(0, lround(derived[12])); |
| 91 EXPECT_EQ(0, lround(derived[13])); |
| 92 EXPECT_EQ(0, lround(derived[14])); |
98 } | 93 } |
| 94 |
| 95 } |
OLD | NEW |