Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(464)

Unified Diff: heuristics/distillable/extract_features.js

Issue 1808503002: Update distillability modeling scripts to predict long articles (Closed) Base URL: git@github.com:chromium/dom-distiller.git@ml-visible
Patch Set: update docs Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « heuristics/distillable/check_distilled_mhtml.py ('k') | heuristics/distillable/get_screenshots.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: heuristics/distillable/extract_features.js
diff --git a/heuristics/distillable/extract_features.js b/heuristics/distillable/extract_features.js
index 068c71e1bb61e921cd0c98861c48442936401398..2de9dd737b3c9a63c626d819d3587a4393170071 100644
--- a/heuristics/distillable/extract_features.js
+++ b/heuristics/distillable/extract_features.js
@@ -73,7 +73,7 @@ return (function() {
}
var body = document.body;
- return {
+ var features = {
'opengraph': hasOGArticle(),
'url': document.location.href,
'title': document.title,
@@ -86,13 +86,11 @@ return (function() {
'innerText': body.innerText,
'textContent': body.textContent,
'innerHTML': body.innerHTML,
- 'mozScore': _mozScore(true, 0.5, 140, true, 1e100),
- 'mozScoreAllSqrt': _mozScore(true, 0.5, 0, true, 1e100),
- 'mozScoreAllLinear': _mozScore(true, 1, 0, true, 1e100),
- 'mozScoreFast': _mozScore(false, 0.5, 140, true, 1000),
- 'mozScoreFastAllSqrt': _mozScore(false, 0.5, 0, true, 1000),
- 'mozScoreFastAllLinear': _mozScore(false, 1, 0, true, 1000),
+ 'mozScore': Math.min(6 * Math.sqrt(1000 - 140), _mozScore(false, 0.5, 140, true, 1000)),
+ 'mozScoreAllSqrt': Math.min(6 * Math.sqrt(1000), _mozScore(false, 0.5, 0, true, 1000)),
+ 'mozScoreAllLinear': Math.min(6 * 1000, _mozScore(false, 1, 0, true, 1000)),
'visibleElements': countVisible(body.querySelectorAll('*')),
'visiblePPRE': countVisible(body.querySelectorAll('p,pre')),
}
+ return features;
})()
« no previous file with comments | « heuristics/distillable/check_distilled_mhtml.py ('k') | heuristics/distillable/get_screenshots.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698