Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Side by Side Diff: components/dom_distiller/core/page_features_unittest.cc

Issue 1248643004: Test distillability without JavaScript (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@early
Patch Set: fix browsertest, merge webkit CL, merge http://crrev.com/1403413004 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/page_features.h" 5 #include "components/dom_distiller/core/page_features.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/files/file_util.h" 10 #include "base/files/file_util.h"
11 #include "base/json/json_reader.h" 11 #include "base/json/json_reader.h"
12 #include "base/json/json_writer.h" 12 #include "base/json/json_writer.h"
13 #include "base/memory/scoped_ptr.h" 13 #include "base/memory/scoped_ptr.h"
14 #include "base/path_service.h" 14 #include "base/path_service.h"
15 #include "testing/gtest/include/gtest/gtest.h" 15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "third_party/WebKit/public/platform/WebDistillability.h"
17 #include "url/gurl.h"
16 18
17 namespace dom_distiller { 19 namespace dom_distiller {
18 20
19 // This test uses input data of core features and the output of the training 21 // This test uses input data of core features and the output of the training
20 // pipeline's derived feature extraction to ensure that the extraction that is 22 // pipeline's derived feature extraction to ensure that the extraction that is
21 // done in Chromium matches that in the training pipeline. 23 // done in Chromium matches that in the training pipeline.
22 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
23 base::FilePath dir_source_root;
24 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root));
25 std::string input_data;
26 ASSERT_TRUE(base::ReadFileToString(
27 dir_source_root.AppendASCII(
28 "components/test/data/dom_distiller/core_features.json"),
29 &input_data));
30 std::string expected_output_data;
31 // This file contains the output from the calculation of derived features in
32 // the training pipeline.
33 ASSERT_TRUE(base::ReadFileToString(
34 dir_source_root.AppendASCII(
35 "components/test/data/dom_distiller/derived_features.json"),
36 &expected_output_data));
37 24
38 scoped_ptr<base::Value> input_json = base::JSONReader::Read(input_data); 25 TEST(DomDistillerPageFeaturesTest, TestPath) {
39 ASSERT_TRUE(input_json); 26 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
40 27
41 scoped_ptr<base::Value> expected_output_json = 28 GURL url("http://example.com/search/view/index/the-title-of-archive.php");
42 base::JSONReader::Read(expected_output_data);
43 ASSERT_TRUE(expected_output_json);
44 29
45 base::ListValue* input_entries; 30 std::vector<double> derived(CalculateDerivedFeatures(f, url));
46 ASSERT_TRUE(input_json->GetAsList(&input_entries)); 31 EXPECT_EQ(kDerivedFeaturesCount, derived.size());
47 ASSERT_GT(input_entries->GetSize(), 0u);
48 32
49 base::ListValue* expected_output_entries; 33 EXPECT_EQ(0, lround(derived[1]));
50 ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries)); 34 EXPECT_EQ(1, lround(derived[2]));
51 ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize()); 35 EXPECT_EQ(1, lround(derived[3]));
36 EXPECT_EQ(1, lround(derived[4]));
37 EXPECT_EQ(1, lround(derived[5]));
38 EXPECT_EQ(0, lround(derived[6]));
39 EXPECT_EQ(0, lround(derived[7]));
40 EXPECT_EQ(1, lround(derived[8]));
41 EXPECT_EQ(43, lround(derived[9]));
42 EXPECT_EQ(0, lround(derived[10]));
43 EXPECT_EQ(4, lround(derived[11]));
44 EXPECT_EQ(4, lround(derived[12]));
45 EXPECT_EQ(0, lround(derived[13]));
46 EXPECT_EQ(24, lround(derived[14]));
47 }
52 48
53 // In the output, the features list is a sequence of labels followed by values 49 TEST(DomDistillerPageFeaturesTest, TestPath2) {
54 // (so labels at even indices, values at odd indices). 50 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
55 base::DictionaryValue* entry;
56 base::ListValue* derived_features;
57 ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry));
58 ASSERT_TRUE(entry->GetList("features", &derived_features));
59 std::vector<std::string> labels;
60 for (size_t i = 0; i < derived_features->GetSize(); i += 2) {
61 std::string label;
62 ASSERT_TRUE(derived_features->GetString(i, &label));
63 labels.push_back(label);
64 }
65 51
66 for (size_t i = 0; i < input_entries->GetSize(); ++i) { 52 GURL url("http://example.com/phpbb/forum123/456.asp");
67 base::DictionaryValue* core_features;
68 ASSERT_TRUE(input_entries->GetDictionary(i, &entry));
69 ASSERT_TRUE(entry->GetDictionary("features", &core_features));
70 // CalculateDerivedFeaturesFromJSON expects a base::Value of the stringified
71 // JSON (and not a base::Value of the JSON itself)
72 std::string stringified_json;
73 ASSERT_TRUE(base::JSONWriter::Write(*core_features, &stringified_json));
74 scoped_ptr<base::Value> stringified_value(
75 new base::StringValue(stringified_json));
76 std::vector<double> derived(
77 CalculateDerivedFeaturesFromJSON(stringified_value.get()));
78 53
79 ASSERT_EQ(labels.size(), derived.size()); 54 std::vector<double> derived(CalculateDerivedFeatures(f, url));
80 ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry)); 55 EXPECT_EQ(kDerivedFeaturesCount, derived.size());
81 ASSERT_TRUE(entry->GetList("features", &derived_features)); 56 EXPECT_EQ(1, lround(derived[1]));
82 std::string entry_url; 57 EXPECT_EQ(0, lround(derived[2]));
83 ASSERT_TRUE(entry->GetString("url", &entry_url)); 58 EXPECT_EQ(0, lround(derived[3]));
84 for (size_t j = 0, value_index = 1; j < derived.size(); 59 EXPECT_EQ(0, lround(derived[4]));
85 ++j, value_index += 2) { 60 EXPECT_EQ(0, lround(derived[5]));
86 double expected_value; 61 EXPECT_EQ(1, lround(derived[6]));
87 if (!derived_features->GetDouble(value_index, &expected_value)) { 62 EXPECT_EQ(1, lround(derived[7]));
88 bool bool_value; 63 EXPECT_EQ(0, lround(derived[8]));
89 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); 64 EXPECT_EQ(23, lround(derived[9]));
90 expected_value = bool_value ? 1.0 : 0.0; 65 EXPECT_EQ(0, lround(derived[10]));
91 } 66 EXPECT_EQ(3, lround(derived[11]));
92 EXPECT_DOUBLE_EQ(derived[j], expected_value) 67 EXPECT_EQ(1, lround(derived[12]));
93 << "incorrect value for entry with url " << entry_url 68 EXPECT_EQ(2, lround(derived[13]));
94 << " for derived feature " << labels[j]; 69 EXPECT_EQ(7, lround(derived[14]));
95 }
96 }
97 } 70 }
71
72 TEST(DomDistillerPageFeaturesTest, TestPath3) {
73 blink::WebDistillabilityFeatures f = blink::WebDistillabilityFeatures();
74
75 GURL url("https://example.com/");
76
77 std::vector<double> derived(CalculateDerivedFeatures(f, url));
78 EXPECT_EQ(kDerivedFeaturesCount, derived.size());
79 EXPECT_EQ(0, lround(derived[1]));
80 EXPECT_EQ(0, lround(derived[2]));
81 EXPECT_EQ(0, lround(derived[3]));
82 EXPECT_EQ(0, lround(derived[4]));
83 EXPECT_EQ(0, lround(derived[5]));
84 EXPECT_EQ(0, lround(derived[6]));
85 EXPECT_EQ(0, lround(derived[7]));
86 EXPECT_EQ(0, lround(derived[8]));
87 EXPECT_EQ(1, lround(derived[9]));
88 EXPECT_EQ(1, lround(derived[10]));
89 EXPECT_EQ(0, lround(derived[11]));
90 EXPECT_EQ(0, lround(derived[12]));
91 EXPECT_EQ(0, lround(derived[13]));
92 EXPECT_EQ(0, lround(derived[14]));
98 } 93 }
94
95 }
OLDNEW
« no previous file with comments | « components/dom_distiller/core/page_features.cc ('k') | components/test/data/dom_distiller/core_features.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698