Chromium Code Reviews| Index: third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
| diff --git a/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp b/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..b1b22782422c572f4159afdeca6fb8aac176333b |
| --- /dev/null |
| +++ b/third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp |
| @@ -0,0 +1,75 @@ |
| +// Copyright 2017 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "modules/copyless_paste/CopylessPasteExtractor.h" |
| + |
| +#include "core/HTMLNames.h" |
| +#include "core/dom/Document.h" |
| +#include "core/dom/ElementTraversal.h" |
| +#include "core/frame/LocalFrame.h" |
| +#include "core/html/HTMLElement.h" |
| +#include "platform/Histogram.h" |
| +#include "platform/instrumentation/tracing/TraceEvent.h" |
| +#include "wtf/text/StringBuilder.h" |
| + |
| +namespace blink { |
| + |
| +namespace { |
| + |
| +String extractMetadata(Element& root) { |
| + StringBuilder result; |
| + result.append("["); |
| + bool multiple = false; |
| + for (Element& element : ElementTraversal::descendantsOf(root)) { |
| + if (!element.hasTagName(HTMLNames::scriptTag)) |
| + continue; |
| + if (element.hasTagName(HTMLNames::scriptTag) && |
|
esprehn
2017/02/15 00:10:03
no need to check hasTagName twice right?
wychen
2017/02/15 01:09:33
Oops.
|
| + element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
| + if (multiple) { |
| + result.append(","); |
| + } |
| + result.append(element.textContent()); |
| + multiple = true; |
| + } |
| + } |
| + result.append("]"); |
| + return result.toString(); |
| +} |
| + |
| +} // namespace |
| + |
| +// Do JSON-LD extraction, which is equivalant to the following JavaScript code: |
|
esprehn
2017/02/15 00:10:03
This belongs in the header file
wychen
2017/02/15 01:09:33
Deleted.
|
| +// |
| +// var a=document.querySelectorAll("script[type=\"application/ld+json\"]"); |
| +// if(a) { |
| +// var b=[]; |
| +// a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) }); |
| +// b; |
| +// } |
| +String CopylessPasteExtractor::extract(Document& document) { |
| + TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); |
| + |
| + if (!document.frame() || !document.frame()->isMainFrame()) |
| + return emptyString; |
| + |
| + DCHECK(document.hasFinishedParsing()); |
| + |
| + Element* html = document.documentElement(); |
| + if (!html) |
| + return emptyString; |
| + |
| + double startTime = monotonicallyIncreasingTime(); |
| + |
| + // Traverse the DOM tree and extract the metadata. |
| + String result = extractMetadata(*html); |
| + |
| + double elapsedTime = monotonicallyIncreasingTime() - startTime; |
| + |
| + DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, |
| + ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); |
| + extractionHistogram.count(static_cast<int>(1e6 * elapsedTime)); |
| + return result; |
| +} |
| + |
| +} // namespace blink |