OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/page_features.h" | 5 #include "components/dom_distiller/core/page_features.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/files/file_util.h" | 10 #include "base/files/file_util.h" |
11 #include "base/json/json_reader.h" | 11 #include "base/json/json_reader.h" |
12 #include "base/json/json_writer.h" | 12 #include "base/json/json_writer.h" |
13 #include "base/memory/scoped_ptr.h" | 13 #include "base/memory/scoped_ptr.h" |
14 #include "base/path_service.h" | 14 #include "base/path_service.h" |
15 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
| 16 #include "url/gurl.h" |
16 | 17 |
17 namespace dom_distiller { | 18 namespace dom_distiller { |
18 | 19 |
19 // This test uses input data of core features and the output of the training | 20 // This test uses input data of core features and the output of the training |
20 // pipeline's derived feature extraction to ensure that the extraction that is | 21 // pipeline's derived feature extraction to ensure that the extraction that is |
21 // done in Chromium matches that in the training pipeline. | 22 // done in Chromium matches that in the training pipeline. |
22 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { | 23 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { |
23 base::FilePath dir_source_root; | 24 base::FilePath dir_source_root; |
24 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); | 25 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); |
25 std::string input_data; | 26 std::string input_data; |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 bool bool_value; | 89 bool bool_value; |
89 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); | 90 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); |
90 expected_value = bool_value ? 1.0 : 0.0; | 91 expected_value = bool_value ? 1.0 : 0.0; |
91 } | 92 } |
92 EXPECT_DOUBLE_EQ(derived[j], expected_value) | 93 EXPECT_DOUBLE_EQ(derived[j], expected_value) |
93 << "incorrect value for entry with url " << entry_url | 94 << "incorrect value for entry with url " << entry_url |
94 << " for derived feature " << labels[j]; | 95 << " for derived feature " << labels[j]; |
95 } | 96 } |
96 } | 97 } |
97 } | 98 } |
| 99 |
| 100 std::vector<double> DeriveFromPath(const GURL& url) { |
| 101 return CalculateDerivedFeatures( |
| 102 false, // bool openGraph |
| 103 url, // const GURL& url |
| 104 0, // unsigned elementCount |
| 105 0, // unsigned anchorCount |
| 106 0, // unsigned formCount |
| 107 0, // double mozScore |
| 108 0, // double mozScoreAllSqrt |
| 109 0 // double mozScoreAllLinear |
| 110 ); |
98 } | 111 } |
| 112 |
| 113 TEST(DomDistillerPageFeaturesTest, TestPath) { |
| 114 GURL url("http://example.com/search/view/index/the-title-of-archive.php"); |
| 115 |
| 116 std::vector<double> derived(DeriveFromPath(url)); |
| 117 EXPECT_EQ(0, lround(derived[1])); |
| 118 EXPECT_EQ(1, lround(derived[2])); |
| 119 EXPECT_EQ(1, lround(derived[3])); |
| 120 EXPECT_EQ(1, lround(derived[4])); |
| 121 EXPECT_EQ(1, lround(derived[5])); |
| 122 EXPECT_EQ(0, lround(derived[6])); |
| 123 EXPECT_EQ(0, lround(derived[7])); |
| 124 EXPECT_EQ(1, lround(derived[8])); |
| 125 EXPECT_EQ(43, lround(derived[9])); |
| 126 EXPECT_EQ(0, lround(derived[10])); |
| 127 EXPECT_EQ(4, lround(derived[11])); |
| 128 EXPECT_EQ(4, lround(derived[12])); |
| 129 EXPECT_EQ(0, lround(derived[13])); |
| 130 EXPECT_EQ(24, lround(derived[14])); |
| 131 } |
| 132 |
| 133 TEST(DomDistillerPageFeaturesTest, TestPath2) { |
| 134 GURL url("http://example.com/phpbb/forum123/456.asp"); |
| 135 |
| 136 std::vector<double> derived(DeriveFromPath(url)); |
| 137 EXPECT_EQ(1, lround(derived[1])); |
| 138 EXPECT_EQ(0, lround(derived[2])); |
| 139 EXPECT_EQ(0, lround(derived[3])); |
| 140 EXPECT_EQ(0, lround(derived[4])); |
| 141 EXPECT_EQ(0, lround(derived[5])); |
| 142 EXPECT_EQ(1, lround(derived[6])); |
| 143 EXPECT_EQ(1, lround(derived[7])); |
| 144 EXPECT_EQ(0, lround(derived[8])); |
| 145 EXPECT_EQ(23, lround(derived[9])); |
| 146 EXPECT_EQ(0, lround(derived[10])); |
| 147 EXPECT_EQ(3, lround(derived[11])); |
| 148 EXPECT_EQ(1, lround(derived[12])); |
| 149 EXPECT_EQ(2, lround(derived[13])); |
| 150 EXPECT_EQ(7, lround(derived[14])); |
| 151 } |
| 152 |
| 153 TEST(DomDistillerPageFeaturesTest, TestPath3) { |
| 154 GURL url("https://example.com/"); |
| 155 |
| 156 std::vector<double> derived(DeriveFromPath(url)); |
| 157 EXPECT_EQ(0, lround(derived[1])); |
| 158 EXPECT_EQ(0, lround(derived[2])); |
| 159 EXPECT_EQ(0, lround(derived[3])); |
| 160 EXPECT_EQ(0, lround(derived[4])); |
| 161 EXPECT_EQ(0, lround(derived[5])); |
| 162 EXPECT_EQ(0, lround(derived[6])); |
| 163 EXPECT_EQ(0, lround(derived[7])); |
| 164 EXPECT_EQ(0, lround(derived[8])); |
| 165 EXPECT_EQ(1, lround(derived[9])); |
| 166 EXPECT_EQ(1, lround(derived[10])); |
| 167 EXPECT_EQ(0, lround(derived[11])); |
| 168 EXPECT_EQ(0, lround(derived[12])); |
| 169 EXPECT_EQ(0, lround(derived[13])); |
| 170 EXPECT_EQ(0, lround(derived[14])); |
| 171 } |
| 172 } |
OLD | NEW |