Index: extract_features.js |
diff --git a/extract_features.js b/extract_features.js |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f6996dc594141fada6b725f0cdcc7e6c681f5201 |
--- /dev/null |
+++ b/extract_features.js |
@@ -0,0 +1,24 @@ |
+return (function() { |
+ function hasOGArticle() { |
+ var elems = document.head.querySelectorAll( |
+ 'meta[property="og:type"],meta[name="og:type"]'); |
+ for (var i in elems) { |
+ if (elems[i].content && elems[i].content.toUpperCase() == 'ARTICLE') { |
+ return true; |
+ } |
+ } |
+ return false; |
+ } |
+ |
+ var body = document.body; |
+ return { |
+ 'opengraph': hasOGArticle(), |
+ 'url': document.location.href, |
+ 'numElements': body.querySelectorAll('*').length, |
+ 'numAnchors': body.querySelectorAll('a').length, |
+ 'numForms': body.querySelectorAll('form').length, |
+ 'innerText': body.innerText, |
+ 'textContent': body.textContent, |
+ 'innerHTML': body.innerHTML, |
+ } |
+})() |