Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Unified Diff: components/dom_distiller/core/javascript/extract_features.js

Issue 1047223003: Add integration of the new heuristics (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@dd-adaboost-model
Patch Set: whitelist resources for ios Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/core/javascript/extract_features.js
diff --git a/components/dom_distiller/core/javascript/extract_features.js b/components/dom_distiller/core/javascript/extract_features.js
new file mode 100644
index 0000000000000000000000000000000000000000..031254c436c11a88e38732ae839529a759123957
--- /dev/null
+++ b/components/dom_distiller/core/javascript/extract_features.js
@@ -0,0 +1,28 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+(function() {
+ function hasOGArticle() {
+ var elems = document.head.querySelectorAll(
+ 'meta[property="og:type"],meta[name="og:type"]');
+ for (var i in elems) {
+ if (elems[i].content && elems[i].content.toUpperCase() == 'ARTICLE') {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ var body = document.body;
+ return JSON.stringify({
+ 'opengraph': hasOGArticle(),
+ 'url': document.location.href,
+ 'numElements': body.querySelectorAll('*').length,
+ 'numAnchors': body.querySelectorAll('a').length,
+ 'numForms': body.querySelectorAll('form').length,
+ 'innerText': body.innerText,
+ 'textContent': body.textContent,
+ 'innerHTML': body.innerHTML,
+ });
+})()
« no previous file with comments | « components/dom_distiller/core/distillable_page_detector_unittest.cc ('k') | components/dom_distiller/core/page_features.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698