Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: components/dom_distiller/core/distillable_page_detector.cc

Issue 1047223003: Add integration of the new heuristics (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@dd-adaboost-model
Patch Set: Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/distillable_page_detector.h" 5 #include "components/dom_distiller/core/distillable_page_detector.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "grit/components_resources.h"
9 #include "ui/base/resource/resource_bundle.h"
8 10
9 namespace dom_distiller { 11 namespace dom_distiller {
10 12
13 const DistillablePageDetector* DistillablePageDetector::GetDefault() {
14 static DistillablePageDetector* detector = nullptr;
15 if (!detector) {
16 std::string serialized_proto =
17 ResourceBundle::GetSharedInstance()
18 .GetRawDataResource(IDR_DISTILLABLE_PAGE_SERIALIZED_MODEL)
19 .as_string();
20 scoped_ptr<AdaBoostProto> proto(new AdaBoostProto);
21 CHECK(proto->ParseFromString(serialized_proto));
22 detector = new DistillablePageDetector(proto.Pass());
23 }
24 return detector;
25 }
26
11 DistillablePageDetector::DistillablePageDetector( 27 DistillablePageDetector::DistillablePageDetector(
12 scoped_ptr<AdaBoostProto> proto) 28 scoped_ptr<AdaBoostProto> proto)
13 : proto_(proto.Pass()), threshold_(0.0) { 29 : proto_(proto.Pass()), threshold_(0.0) {
14 CHECK(proto_->num_stumps() == proto_->stump_size()); 30 CHECK(proto_->num_stumps() == proto_->stump_size());
15 for (int i = 0; i < proto_->num_stumps(); ++i) { 31 for (int i = 0; i < proto_->num_stumps(); ++i) {
16 const StumpProto& stump = proto_->stump(i); 32 const StumpProto& stump = proto_->stump(i);
17 CHECK(stump.feature_number() < proto_->num_features()); 33 CHECK(stump.feature_number() < proto_->num_features());
18 threshold_ += stump.weight() / 2.0; 34 threshold_ += stump.weight() / 2.0;
19 } 35 }
20 } 36 }
21 37
22 DistillablePageDetector::~DistillablePageDetector() { 38 DistillablePageDetector::~DistillablePageDetector() {
23 } 39 }
24 40
25 bool DistillablePageDetector::Classify( 41 bool DistillablePageDetector::Classify(
26 const std::vector<double>& features) const { 42 const std::vector<double>& features) const {
27 return Score(features) > threshold_; 43 return Score(features) > threshold_;
28 } 44 }
29 45
30 double DistillablePageDetector::Score( 46 double DistillablePageDetector::Score(
31 const std::vector<double>& features) const { 47 const std::vector<double>& features) const {
32 CHECK(features.size() == size_t(proto_->num_features())); 48 if (features.size() != size_t(proto_->num_features())) {
nyquist 2015/04/01 18:12:20 I would be OK with moving this to your previous CL
cjhopman 2015/04/01 20:59:29 Done.
49 return 0.0;
50 }
33 double score = 0.0; 51 double score = 0.0;
34 for (int i = 0; i < proto_->num_stumps(); ++i) { 52 for (int i = 0; i < proto_->num_stumps(); ++i) {
35 const StumpProto& stump = proto_->stump(i); 53 const StumpProto& stump = proto_->stump(i);
36 if (features[stump.feature_number()] > stump.split()) { 54 if (features[stump.feature_number()] > stump.split()) {
37 score += stump.weight(); 55 score += stump.weight();
38 } 56 }
39 } 57 }
40 return score; 58 return score;
41 } 59 }
42 60
43 double DistillablePageDetector::GetThreshold() const { 61 double DistillablePageDetector::GetThreshold() const {
44 return threshold_; 62 return threshold_;
45 } 63 }
46 64
47 } // namespace dom_distiller 65 } // namespace dom_distiller
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698