Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(370)

Side by Side Diff: components/dom_distiller/core/page_features.cc

Issue 1680103003: Match the derived features to the training data set (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: style Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/dom_distiller/core/page_features_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/page_features.h" 5 #include "components/dom_distiller/core/page_features.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <string> 9 #include <string>
10 10
11 #include "base/json/json_reader.h" 11 #include "base/json/json_reader.h"
12 #include "third_party/re2/src/re2/re2.h" 12 #include "third_party/re2/src/re2/re2.h"
13 #include "url/gurl.h" 13 #include "url/gurl.h"
14 14
15 namespace dom_distiller { 15 namespace dom_distiller {
16 /* This code needs to derive features in the same way and order in which they 16 /* This code needs to derive features in the same way and order in which they
17 * are derived when training the model. Parts of that code are reproduced in the 17 * are derived when training the model. Parts of that code are reproduced in the
18 * comments below. 18 * comments below.
19 */ 19 */
20 20
21 namespace { 21 namespace {
22 22
23 std::string GetLastSegment(const std::string& path) { 23 std::string GetLastSegment(const std::string& path) {
24 // return re.search('[^/]*\/?$', path).group(0) 24 // return re.search('[^/]*\/?$', path).group(0)
25 if (path.size() == 0) 25 if (path.size() == 0)
26 return ""; 26 return "";
27 size_t start = path.rfind("/", path.size() - 1); 27 if (path.size() == 1) {
28 DCHECK(path[0] == '/');
29 return path;
30 }
31 size_t start = path.rfind("/", path.size() - 2);
28 return start == std::string::npos ? "" : path.substr(start + 1); 32 return start == std::string::npos ? "" : path.substr(start + 1);
29 } 33 }
30 34
31 int CountMatches(const std::string& s, const std::string& p) { 35 int CountMatches(const std::string& s, const std::string& p) {
32 // return len(re.findall(p, s)) 36 // return len(re.findall(p, s))
33 re2::StringPiece sp(s); 37 re2::StringPiece sp(s);
34 re2::RE2 regexp(p); 38 re2::RE2 regexp(p);
35 int count = 0; 39 int count = 0;
36 while (re2::RE2::FindAndConsume(&sp, regexp)) 40 while (re2::RE2::FindAndConsume(&sp, regexp))
37 count++; 41 count++;
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after
236 features.push_back(mozScore); 240 features.push_back(mozScore);
237 // 'mozScoreAllSqrt' 241 // 'mozScoreAllSqrt'
238 features.push_back(mozScoreAllSqrt); 242 features.push_back(mozScoreAllSqrt);
239 // 'mozScoreAllLinear' 243 // 'mozScoreAllLinear'
240 features.push_back(mozScoreAllLinear); 244 features.push_back(mozScoreAllLinear);
241 245
242 return features; 246 return features;
243 } 247 }
244 248
245 } // namespace dom_distiller 249 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « no previous file | components/dom_distiller/core/page_features_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698