Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "modules/copyless_paste/CopylessPasteExtractor.h" | |
| 6 | |
| 7 #include "core/HTMLNames.h" | |
| 8 #include "core/dom/Document.h" | |
| 9 #include "core/dom/ElementTraversal.h" | |
| 10 #include "core/frame/LocalFrame.h" | |
| 11 #include "core/html/HTMLElement.h" | |
| 12 #include "platform/Histogram.h" | |
| 13 #include "platform/instrumentation/tracing/TraceEvent.h" | |
| 14 #include "wtf/text/StringBuilder.h" | |
| 15 | |
| 16 namespace blink { | |
| 17 | |
| 18 namespace { | |
| 19 | |
| 20 String extractMetadata(Element& root) { | |
| 21 StringBuilder result; | |
| 22 result.append("["); | |
| 23 bool multiple = false; | |
| 24 for (Element& element : ElementTraversal::descendantsOf(root)) { | |
| 25 if (!element.hasTagName(HTMLNames::scriptTag)) | |
| 26 continue; | |
| 27 if (element.hasTagName(HTMLNames::scriptTag) && | |
|
esprehn
2017/02/15 00:10:03
no need to check hasTagName twice right?
wychen
2017/02/15 01:09:33
Oops.
| |
| 28 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | |
| 29 if (multiple) { | |
| 30 result.append(","); | |
| 31 } | |
| 32 result.append(element.textContent()); | |
| 33 multiple = true; | |
| 34 } | |
| 35 } | |
| 36 result.append("]"); | |
| 37 return result.toString(); | |
| 38 } | |
| 39 | |
| 40 } // namespace | |
| 41 | |
| 42 // Do JSON-LD extraction, which is equivalant to the following JavaScript code: | |
|
esprehn
2017/02/15 00:10:03
This belongs in the header file
wychen
2017/02/15 01:09:33
Deleted.
| |
| 43 // | |
| 44 // var a=document.querySelectorAll("script[type=\"application/ld+json\"]"); | |
| 45 // if(a) { | |
| 46 // var b=[]; | |
| 47 // a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) }); | |
| 48 // b; | |
| 49 // } | |
| 50 String CopylessPasteExtractor::extract(Document& document) { | |
| 51 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | |
| 52 | |
| 53 if (!document.frame() || !document.frame()->isMainFrame()) | |
| 54 return emptyString; | |
| 55 | |
| 56 DCHECK(document.hasFinishedParsing()); | |
| 57 | |
| 58 Element* html = document.documentElement(); | |
| 59 if (!html) | |
| 60 return emptyString; | |
| 61 | |
| 62 double startTime = monotonicallyIncreasingTime(); | |
| 63 | |
| 64 // Traverse the DOM tree and extract the metadata. | |
| 65 String result = extractMetadata(*html); | |
| 66 | |
| 67 double elapsedTime = monotonicallyIncreasingTime() - startTime; | |
| 68 | |
| 69 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
| 70 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
| 71 extractionHistogram.count(static_cast<int>(1e6 * elapsedTime)); | |
| 72 return result; | |
| 73 } | |
| 74 | |
| 75 } // namespace blink | |
| OLD | NEW |