Index: third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
diff --git a/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp b/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..b1b22782422c572f4159afdeca6fb8aac176333b |
--- /dev/null |
+++ b/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
@@ -0,0 +1,75 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "modules/copyless_paste/CopylessPasteExtractor.h" |
+ |
+#include "core/HTMLNames.h" |
+#include "core/dom/Document.h" |
+#include "core/dom/ElementTraversal.h" |
+#include "core/frame/LocalFrame.h" |
+#include "core/html/HTMLElement.h" |
+#include "platform/Histogram.h" |
+#include "platform/instrumentation/tracing/TraceEvent.h" |
+#include "wtf/text/StringBuilder.h" |
+ |
+namespace blink { |
+ |
+namespace { |
+ |
+String extractMetadata(Element& root) { |
+ StringBuilder result; |
+ result.append("["); |
+ bool multiple = false; |
+ for (Element& element : ElementTraversal::descendantsOf(root)) { |
+ if (!element.hasTagName(HTMLNames::scriptTag)) |
+ continue; |
+ if (element.hasTagName(HTMLNames::scriptTag) && |
esprehn
2017/02/15 00:10:03
no need to check hasTagName twice right?
wychen
2017/02/15 01:09:33
Oops.
|
+ element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
+ if (multiple) { |
+ result.append(","); |
+ } |
+ result.append(element.textContent()); |
+ multiple = true; |
+ } |
+ } |
+ result.append("]"); |
+ return result.toString(); |
+} |
+ |
+} // namespace |
+ |
+// Do JSON-LD extraction, which is equivalant to the following JavaScript code: |
esprehn
2017/02/15 00:10:03
This belongs in the header file
wychen
2017/02/15 01:09:33
Deleted.
|
+// |
+// var a=document.querySelectorAll("script[type=\"application/ld+json\"]"); |
+// if(a) { |
+// var b=[]; |
+// a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) }); |
+// b; |
+// } |
+String CopylessPasteExtractor::extract(Document& document) { |
+ TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); |
+ |
+ if (!document.frame() || !document.frame()->isMainFrame()) |
+ return emptyString; |
+ |
+ DCHECK(document.hasFinishedParsing()); |
+ |
+ Element* html = document.documentElement(); |
+ if (!html) |
+ return emptyString; |
+ |
+ double startTime = monotonicallyIncreasingTime(); |
+ |
+ // Traverse the DOM tree and extract the metadata. |
+ String result = extractMetadata(*html); |
+ |
+ double elapsedTime = monotonicallyIncreasingTime() - startTime; |
+ |
+ DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, |
+ ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); |
+ extractionHistogram.count(static_cast<int>(1e6 * elapsedTime)); |
+ return result; |
+} |
+ |
+} // namespace blink |