Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1131)

Side by Side Diff: components/dom_distiller/core/page_features.cc

Issue 2481923002: [WIP] make GURL::path() return a StringPiece (Closed)
Patch Set: thanks asan Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/page_features.h" 5 #include "components/dom_distiller/core/page_features.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <memory> 9 #include <memory>
10 #include <string> 10 #include <string>
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
63 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, 63 std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
64 const GURL& url, 64 const GURL& url,
65 double numElements, 65 double numElements,
66 double numAnchors, 66 double numAnchors,
67 double numForms, 67 double numForms,
68 const std::string& innerText, 68 const std::string& innerText,
69 const std::string& textContent, 69 const std::string& textContent,
70 const std::string& innerHTML) { 70 const std::string& innerHTML) {
71 // In the training pipeline, the strings are explicitly encoded in utf-8 (as 71 // In the training pipeline, the strings are explicitly encoded in utf-8 (as
72 // they are here). 72 // they are here).
73 const std::string& path = url.path(); 73 const std::string& path = url.path().as_string();
74 int innerTextWords = GetWordCount(innerText); 74 int innerTextWords = GetWordCount(innerText);
75 int textContentWords = GetWordCount(textContent); 75 int textContentWords = GetWordCount(textContent);
76 int innerHTMLWords = GetWordCount(innerHTML); 76 int innerHTMLWords = GetWordCount(innerHTML);
77 std::vector<double> features; 77 std::vector<double> features;
78 // 'opengraph', opengraph, 78 // 'opengraph', opengraph,
79 features.push_back(isOGArticle); 79 features.push_back(isOGArticle);
80 // 'forum', 'forum' in path, 80 // 'forum', 'forum' in path,
81 features.push_back(Contains("forum", path)); 81 features.push_back(Contains("forum", path));
82 // 'index', 'index' in path, 82 // 'index', 'index' in path,
83 features.push_back(Contains("index", path)); 83 features.push_back(Contains("index", path));
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
189 189
190 std::vector<double> CalculateDerivedFeatures( 190 std::vector<double> CalculateDerivedFeatures(
191 bool openGraph, 191 bool openGraph,
192 const GURL& url, 192 const GURL& url,
193 unsigned elementCount, 193 unsigned elementCount,
194 unsigned anchorCount, 194 unsigned anchorCount,
195 unsigned formCount, 195 unsigned formCount,
196 double mozScore, 196 double mozScore,
197 double mozScoreAllSqrt, 197 double mozScoreAllSqrt,
198 double mozScoreAllLinear) { 198 double mozScoreAllLinear) {
199 const std::string& path = url.path(); 199 const std::string& path = url.path().as_string();
200 std::vector<double> features; 200 std::vector<double> features;
201 // 'opengraph', opengraph, 201 // 'opengraph', opengraph,
202 features.push_back(openGraph); 202 features.push_back(openGraph);
203 // 'forum', 'forum' in path, 203 // 'forum', 'forum' in path,
204 features.push_back(Contains("forum", path)); 204 features.push_back(Contains("forum", path));
205 // 'index', 'index' in path, 205 // 'index', 'index' in path,
206 features.push_back(Contains("index", path)); 206 features.push_back(Contains("index", path));
207 // 'search', 'search' in path, 207 // 'search', 'search' in path,
208 features.push_back(Contains("search", path)); 208 features.push_back(Contains("search", path));
209 // 'view', 'view' in path, 209 // 'view', 'view' in path,
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
241 features.push_back(mozScore); 241 features.push_back(mozScore);
242 // 'mozScoreAllSqrt' 242 // 'mozScoreAllSqrt'
243 features.push_back(mozScoreAllSqrt); 243 features.push_back(mozScoreAllSqrt);
244 // 'mozScoreAllLinear' 244 // 'mozScoreAllLinear'
245 features.push_back(mozScoreAllLinear); 245 features.push_back(mozScoreAllLinear);
246 246
247 return features; 247 return features;
248 } 248 }
249 249
250 } // namespace dom_distiller 250 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « components/content_settings/core/common/content_settings_pattern.cc ('k') | components/domain_reliability/util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698