Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(413)

Side by Side Diff: components/dom_distiller/core/page_features_unittest.cc

Issue 1409133007: Add a new set of page features for distillability testing (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@webkit
Patch Set: fix DEPS Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/dom_distiller/core/page_features.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/page_features.h" 5 #include "components/dom_distiller/core/page_features.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/files/file_util.h" 10 #include "base/files/file_util.h"
11 #include "base/json/json_reader.h" 11 #include "base/json/json_reader.h"
12 #include "base/json/json_writer.h" 12 #include "base/json/json_writer.h"
13 #include "base/memory/scoped_ptr.h" 13 #include "base/memory/scoped_ptr.h"
14 #include "base/path_service.h" 14 #include "base/path_service.h"
15 #include "testing/gtest/include/gtest/gtest.h" 15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "url/gurl.h"
16 17
17 namespace dom_distiller { 18 namespace dom_distiller {
18 19
19 // This test uses input data of core features and the output of the training 20 // This test uses input data of core features and the output of the training
20 // pipeline's derived feature extraction to ensure that the extraction that is 21 // pipeline's derived feature extraction to ensure that the extraction that is
21 // done in Chromium matches that in the training pipeline. 22 // done in Chromium matches that in the training pipeline.
22 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { 23 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
23 base::FilePath dir_source_root; 24 base::FilePath dir_source_root;
24 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); 25 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root));
25 std::string input_data; 26 std::string input_data;
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 bool bool_value; 89 bool bool_value;
89 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); 90 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value));
90 expected_value = bool_value ? 1.0 : 0.0; 91 expected_value = bool_value ? 1.0 : 0.0;
91 } 92 }
92 EXPECT_DOUBLE_EQ(derived[j], expected_value) 93 EXPECT_DOUBLE_EQ(derived[j], expected_value)
93 << "incorrect value for entry with url " << entry_url 94 << "incorrect value for entry with url " << entry_url
94 << " for derived feature " << labels[j]; 95 << " for derived feature " << labels[j];
95 } 96 }
96 } 97 }
97 } 98 }
99
100 std::vector<double> DeriveFromPath(const GURL& url) {
101 return CalculateDerivedFeatures(
102 false, // bool openGraph
103 url, // const GURL& url
104 0, // unsigned elementCount
105 0, // unsigned anchorCount
106 0, // unsigned formCount
107 0, // double mozScore
108 0, // double mozScoreAllSqrt
109 0 // double mozScoreAllLinear
110 );
98 } 111 }
112
113 TEST(DomDistillerPageFeaturesTest, TestPath) {
114 GURL url("http://example.com/search/view/index/the-title-of-archive.php");
115
116 std::vector<double> derived(DeriveFromPath(url));
117 EXPECT_EQ(0, lround(derived[1]));
118 EXPECT_EQ(1, lround(derived[2]));
119 EXPECT_EQ(1, lround(derived[3]));
120 EXPECT_EQ(1, lround(derived[4]));
121 EXPECT_EQ(1, lround(derived[5]));
122 EXPECT_EQ(0, lround(derived[6]));
123 EXPECT_EQ(0, lround(derived[7]));
124 EXPECT_EQ(1, lround(derived[8]));
125 EXPECT_EQ(43, lround(derived[9]));
126 EXPECT_EQ(0, lround(derived[10]));
127 EXPECT_EQ(4, lround(derived[11]));
128 EXPECT_EQ(4, lround(derived[12]));
129 EXPECT_EQ(0, lround(derived[13]));
130 EXPECT_EQ(24, lround(derived[14]));
131 }
132
133 TEST(DomDistillerPageFeaturesTest, TestPath2) {
134 GURL url("http://example.com/phpbb/forum123/456.asp");
135
136 std::vector<double> derived(DeriveFromPath(url));
137 EXPECT_EQ(1, lround(derived[1]));
138 EXPECT_EQ(0, lround(derived[2]));
139 EXPECT_EQ(0, lround(derived[3]));
140 EXPECT_EQ(0, lround(derived[4]));
141 EXPECT_EQ(0, lround(derived[5]));
142 EXPECT_EQ(1, lround(derived[6]));
143 EXPECT_EQ(1, lround(derived[7]));
144 EXPECT_EQ(0, lround(derived[8]));
145 EXPECT_EQ(23, lround(derived[9]));
146 EXPECT_EQ(0, lround(derived[10]));
147 EXPECT_EQ(3, lround(derived[11]));
148 EXPECT_EQ(1, lround(derived[12]));
149 EXPECT_EQ(2, lround(derived[13]));
150 EXPECT_EQ(7, lround(derived[14]));
151 }
152
153 TEST(DomDistillerPageFeaturesTest, TestPath3) {
154 GURL url("https://example.com/");
155
156 std::vector<double> derived(DeriveFromPath(url));
157 EXPECT_EQ(0, lround(derived[1]));
158 EXPECT_EQ(0, lround(derived[2]));
159 EXPECT_EQ(0, lround(derived[3]));
160 EXPECT_EQ(0, lround(derived[4]));
161 EXPECT_EQ(0, lround(derived[5]));
162 EXPECT_EQ(0, lround(derived[6]));
163 EXPECT_EQ(0, lround(derived[7]));
164 EXPECT_EQ(0, lround(derived[8]));
165 EXPECT_EQ(1, lround(derived[9]));
166 EXPECT_EQ(1, lround(derived[10]));
167 EXPECT_EQ(0, lround(derived[11]));
168 EXPECT_EQ(0, lround(derived[12]));
169 EXPECT_EQ(0, lround(derived[13]));
170 EXPECT_EQ(0, lround(derived[14]));
171 }
172 }
OLDNEW
« no previous file with comments | « components/dom_distiller/core/page_features.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698