Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Unified Diff: extract_features.js

Issue 1289123002: Merge branch 'master' into heuristics Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « calculate_derived_features.py ('k') | foo/index » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: extract_features.js
diff --git a/extract_features.js b/extract_features.js
new file mode 100644
index 0000000000000000000000000000000000000000..f6996dc594141fada6b725f0cdcc7e6c681f5201
--- /dev/null
+++ b/extract_features.js
@@ -0,0 +1,24 @@
+return (function() {
+ function hasOGArticle() {
+ var elems = document.head.querySelectorAll(
+ 'meta[property="og:type"],meta[name="og:type"]');
+ for (var i in elems) {
+ if (elems[i].content && elems[i].content.toUpperCase() == 'ARTICLE') {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ var body = document.body;
+ return {
+ 'opengraph': hasOGArticle(),
+ 'url': document.location.href,
+ 'numElements': body.querySelectorAll('*').length,
+ 'numAnchors': body.querySelectorAll('a').length,
+ 'numForms': body.querySelectorAll('form').length,
+ 'innerText': body.innerText,
+ 'textContent': body.textContent,
+ 'innerHTML': body.innerHTML,
+ }
+})()
« no previous file with comments | « calculate_derived_features.py ('k') | foo/index » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698