Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "modules/copyless_paste/CopylessPasteExtractor.h" | |
| 6 | |
| 7 #include "core/HTMLNames.h" | |
| 8 #include "core/dom/Document.h" | |
| 9 #include "core/dom/ElementTraversal.h" | |
| 10 #include "core/frame/LocalFrame.h" | |
| 11 #include "core/html/HTMLElement.h" | |
| 12 #include "platform/Histogram.h" | |
| 13 #include "platform/instrumentation/tracing/TraceEvent.h" | |
| 14 #include "wtf/text/StringBuilder.h" | |
| 15 | |
| 16 namespace blink { | |
| 17 | |
| 18 namespace { | |
| 19 | |
| 20 String extractMetadata(Element& root) { | |
| 21 StringBuilder result; | |
| 22 result.append("["); | |
| 23 bool multiple = false; | |
| 24 for (Element& element : ElementTraversal::descendantsOf(root)) { | |
| 25 if (!element.hasTagName(HTMLNames::scriptTag)) | |
|
esprehn
2017/02/13 23:47:04
This doesn't understand shadow DOM, also this is w
wychen
2017/02/14 00:26:08
Not understanding shadow DOM should be fine, since
| |
| 26 continue; | |
| 27 if (element.hasTagName(HTMLNames::scriptTag) && | |
| 28 element.hasAttribute(HTMLNames::typeAttr) && | |
|
esprehn
2017/02/13 23:47:04
no need to check hasAttribute, this is just doing
wychen
2017/02/14 02:24:16
Done.
| |
| 29 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | |
| 30 if (multiple) { | |
| 31 result.append(","); | |
| 32 } | |
| 33 result.append(element.textContent(false)); | |
|
esprehn
2017/02/13 23:47:04
false is the default, remove this
wychen
2017/02/14 02:24:16
Done.
| |
| 34 multiple = true; | |
| 35 } | |
| 36 } | |
| 37 result.append("]"); | |
| 38 return result.toString(); | |
| 39 } | |
| 40 | |
| 41 } // namespace | |
| 42 | |
| 43 // Do JSON-LD extraction, which is equivalant to the following JavaScript code: | |
| 44 // | |
| 45 // var a=document.querySelectorAll("script[type=\"application/ld+json\"]"); | |
|
esprehn
2017/02/13 23:47:04
Can we store these in a map instead of scanning th
wychen
2017/02/14 00:26:08
The JSON-LD data extracted here would be transient
| |
| 46 // if(a) { | |
| 47 // var b=[]; | |
| 48 // a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) }); | |
| 49 // b; | |
| 50 // } | |
| 51 String CopylessPasteExtractor::extract(Document& document) { | |
| 52 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | |
| 53 | |
| 54 if (!document.frame() || !document.frame()->isMainFrame()) | |
| 55 return ""; | |
| 56 | |
| 57 DCHECK(document.hasFinishedParsing()); | |
| 58 | |
| 59 Element* html = document.documentElement(); | |
| 60 if (!html) | |
| 61 return ""; | |
|
esprehn
2017/02/13 23:47:04
emptyString
wychen
2017/02/14 02:24:16
Done.
| |
| 62 | |
| 63 double startTime = monotonicallyIncreasingTime(); | |
| 64 | |
| 65 // Traverse the DOM tree and extract the metadata. | |
| 66 String result = extractMetadata(*html); | |
| 67 | |
| 68 double elapsedTime = monotonicallyIncreasingTime() - startTime; | |
| 69 | |
| 70 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
| 71 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
| 72 extractionHistogram.count(static_cast<int>(1e6 * elapsedTime)); | |
| 73 return result; | |
| 74 } | |
| 75 | |
| 76 } // namespace blink | |
| OLD | NEW |