| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/dom_distiller/core/page_features.h" | 5 #include "components/dom_distiller/core/page_features.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include <memory> | 9 #include <memory> |
| 10 #include <string> | 10 #include <string> |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, | 63 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, |
| 64 const GURL& url, | 64 const GURL& url, |
| 65 double numElements, | 65 double numElements, |
| 66 double numAnchors, | 66 double numAnchors, |
| 67 double numForms, | 67 double numForms, |
| 68 const std::string& innerText, | 68 const std::string& innerText, |
| 69 const std::string& textContent, | 69 const std::string& textContent, |
| 70 const std::string& innerHTML) { | 70 const std::string& innerHTML) { |
| 71 // In the training pipeline, the strings are explicitly encoded in utf-8 (as | 71 // In the training pipeline, the strings are explicitly encoded in utf-8 (as |
| 72 // they are here). | 72 // they are here). |
| 73 const std::string& path = url.path(); | 73 const std::string& path = url.path().as_string(); |
| 74 int innerTextWords = GetWordCount(innerText); | 74 int innerTextWords = GetWordCount(innerText); |
| 75 int textContentWords = GetWordCount(textContent); | 75 int textContentWords = GetWordCount(textContent); |
| 76 int innerHTMLWords = GetWordCount(innerHTML); | 76 int innerHTMLWords = GetWordCount(innerHTML); |
| 77 std::vector<double> features; | 77 std::vector<double> features; |
| 78 // 'opengraph', opengraph, | 78 // 'opengraph', opengraph, |
| 79 features.push_back(isOGArticle); | 79 features.push_back(isOGArticle); |
| 80 // 'forum', 'forum' in path, | 80 // 'forum', 'forum' in path, |
| 81 features.push_back(Contains("forum", path)); | 81 features.push_back(Contains("forum", path)); |
| 82 // 'index', 'index' in path, | 82 // 'index', 'index' in path, |
| 83 features.push_back(Contains("index", path)); | 83 features.push_back(Contains("index", path)); |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 189 | 189 |
| 190 std::vector<double> CalculateDerivedFeatures( | 190 std::vector<double> CalculateDerivedFeatures( |
| 191 bool openGraph, | 191 bool openGraph, |
| 192 const GURL& url, | 192 const GURL& url, |
| 193 unsigned elementCount, | 193 unsigned elementCount, |
| 194 unsigned anchorCount, | 194 unsigned anchorCount, |
| 195 unsigned formCount, | 195 unsigned formCount, |
| 196 double mozScore, | 196 double mozScore, |
| 197 double mozScoreAllSqrt, | 197 double mozScoreAllSqrt, |
| 198 double mozScoreAllLinear) { | 198 double mozScoreAllLinear) { |
| 199 const std::string& path = url.path(); | 199 const std::string& path = url.path().as_string(); |
| 200 std::vector<double> features; | 200 std::vector<double> features; |
| 201 // 'opengraph', opengraph, | 201 // 'opengraph', opengraph, |
| 202 features.push_back(openGraph); | 202 features.push_back(openGraph); |
| 203 // 'forum', 'forum' in path, | 203 // 'forum', 'forum' in path, |
| 204 features.push_back(Contains("forum", path)); | 204 features.push_back(Contains("forum", path)); |
| 205 // 'index', 'index' in path, | 205 // 'index', 'index' in path, |
| 206 features.push_back(Contains("index", path)); | 206 features.push_back(Contains("index", path)); |
| 207 // 'search', 'search' in path, | 207 // 'search', 'search' in path, |
| 208 features.push_back(Contains("search", path)); | 208 features.push_back(Contains("search", path)); |
| 209 // 'view', 'view' in path, | 209 // 'view', 'view' in path, |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 241 features.push_back(mozScore); | 241 features.push_back(mozScore); |
| 242 // 'mozScoreAllSqrt' | 242 // 'mozScoreAllSqrt' |
| 243 features.push_back(mozScoreAllSqrt); | 243 features.push_back(mozScoreAllSqrt); |
| 244 // 'mozScoreAllLinear' | 244 // 'mozScoreAllLinear' |
| 245 features.push_back(mozScoreAllLinear); | 245 features.push_back(mozScoreAllLinear); |
| 246 | 246 |
| 247 return features; | 247 return features; |
| 248 } | 248 } |
| 249 | 249 |
| 250 } // namespace dom_distiller | 250 } // namespace dom_distiller |
| OLD | NEW |