Chromium Code Reviews| Index: components/dom_distiller/core/page_features_unittest.cc |
| diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..1bc59f2070006bcc4f40b661f3e9b0cb56da160c |
| --- /dev/null |
| +++ b/components/dom_distiller/core/page_features_unittest.cc |
| @@ -0,0 +1,90 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/dom_distiller/core/page_features.h" |
| + |
| +#include <string> |
| +#include <vector> |
| + |
| +#include "base/files/file_util.h" |
| +#include "base/json/json_reader.h" |
| +#include "base/memory/scoped_ptr.h" |
| +#include "base/path_service.h" |
| +#include "testing/gtest/include/gtest/gtest.h" |
| + |
| +namespace dom_distiller { |
| + |
| +// This test uses input data of core features and the output of the training |
| +// pipeline's derived feature extraction to ensure that the extraction that is |
| +// done in Chromium matches that in the training pipeline. |
| +TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { |
| + base::FilePath dir_source_root; |
| + EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); |
| + std::string input_data; |
| + ASSERT_TRUE(base::ReadFileToString( |
| + dir_source_root.AppendASCII( |
| + "components/test/data/dom_distiller/core_features.json"), |
| + &input_data)); |
| + std::string expected_output_data; |
| + // This file contains the output from the calculation of derived features in |
| + // the training pipeline. |
| + ASSERT_TRUE(base::ReadFileToString( |
| + dir_source_root.AppendASCII( |
| + "components/test/data/dom_distiller/derived_features.json"), |
| + &expected_output_data)); |
| + |
| + scoped_ptr<base::Value> input_json(base::JSONReader::Read(input_data)); |
| + ASSERT_TRUE(input_json); |
| + |
| + scoped_ptr<base::Value> expected_output_json( |
| + base::JSONReader::Read(expected_output_data)); |
| + ASSERT_TRUE(expected_output_json); |
| + |
| + base::ListValue* input_entries; |
| + ASSERT_TRUE(input_json->GetAsList(&input_entries)); |
| + ASSERT_GT(input_entries->GetSize(), 0u); |
| + |
| + base::ListValue* expected_output_entries; |
| + ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries)); |
| + ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize()); |
| + |
| + // In the output, the features list is a sequence of labels followed by values |
| + // (so labels at even indices, values at odd indices). |
| + base::DictionaryValue* entry; |
| + base::ListValue* derived_features; |
| + ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry)); |
| + ASSERT_TRUE(entry->GetList("features", &derived_features)); |
| + std::vector<std::string> labels; |
| + for (size_t i = 0; i < derived_features->GetSize(); i += 2) { |
| + std::string label; |
| + ASSERT_TRUE(derived_features->GetString(i, &label)); |
| + labels.push_back(label); |
| + } |
| + |
| + for (size_t i = 0; i < input_entries->GetSize(); ++i) { |
| + base::DictionaryValue* core_features; |
| + ASSERT_TRUE(input_entries->GetDictionary(i, &entry)); |
| + ASSERT_TRUE(entry->GetDictionary("features", &core_features)); |
| + std::vector<double> derived( |
| + CalculateDerivedFeaturesFromJSON(core_features)); |
| + |
| + ASSERT_EQ(labels.size(), derived.size()); |
| + ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry)); |
| + ASSERT_TRUE(entry->GetList("features", &derived_features)); |
| + for (size_t j = 0; j < derived.size(); ++j) { |
| + double expected_value; |
| + if (!derived_features->GetDouble(j * 2 + 1, &expected_value)) { |
|
nyquist
2015/04/01 01:19:14
Nit: int value_index = j * 2 + 1;
Use here and bel
cjhopman
2015/04/01 20:12:13
Done.
|
| + bool bool_value; |
| + ASSERT_TRUE(derived_features->GetBoolean(j * 2 + 1, &bool_value)); |
| + expected_value = bool_value ? 1.0 : 0.0; |
| + } |
| + std::string entry_url; |
|
nyquist
2015/04/01 01:19:14
Nit: Could this and filling it with the url string
cjhopman
2015/04/01 20:12:13
Done.
|
| + ASSERT_TRUE(entry->GetString("url", &entry_url)); |
| + EXPECT_DOUBLE_EQ(derived[j], expected_value) |
| + << "incorrect value for entry with url " << entry_url |
| + << " for derived feature " << labels[j]; |
| + } |
| + } |
| +} |
| +} |