OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "modules/copyless_paste/CopylessPasteExtractor.h" | |
6 | |
7 #include "core/HTMLNames.h" | |
8 #include "core/dom/Document.h" | |
9 #include "core/dom/ElementTraversal.h" | |
10 #include "core/frame/LocalFrame.h" | |
11 #include "core/html/HTMLElement.h" | |
12 #include "platform/Histogram.h" | |
13 #include "platform/instrumentation/tracing/TraceEvent.h" | |
14 #include "wtf/text/StringBuilder.h" | |
15 | |
16 namespace blink { | |
17 | |
18 namespace { | |
19 | |
20 String extractMetadata(Element& root) { | |
21 StringBuilder result; | |
22 result.append("["); | |
23 bool multiple = false; | |
24 for (Element& element : ElementTraversal::descendantsOf(root)) { | |
25 if (!element.hasTagName(HTMLNames::scriptTag)) | |
esprehn
2017/02/13 23:47:04
This doesn't understand shadow DOM, also this is w
wychen
2017/02/14 00:26:08
Not understanding shadow DOM should be fine, since
| |
26 continue; | |
27 if (element.hasTagName(HTMLNames::scriptTag) && | |
28 element.hasAttribute(HTMLNames::typeAttr) && | |
esprehn
2017/02/13 23:47:04
no need to check hasAttribute, this is just doing
wychen
2017/02/14 02:24:16
Done.
| |
29 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | |
30 if (multiple) { | |
31 result.append(","); | |
32 } | |
33 result.append(element.textContent(false)); | |
esprehn
2017/02/13 23:47:04
false is the default, remove this
wychen
2017/02/14 02:24:16
Done.
| |
34 multiple = true; | |
35 } | |
36 } | |
37 result.append("]"); | |
38 return result.toString(); | |
39 } | |
40 | |
41 } // namespace | |
42 | |
43 // Do JSON-LD extraction, which is equivalant to the following JavaScript code: | |
44 // | |
45 // var a=document.querySelectorAll("script[type=\"application/ld+json\"]"); | |
esprehn
2017/02/13 23:47:04
Can we store these in a map instead of scanning th
wychen
2017/02/14 00:26:08
The JSON-LD data extracted here would be transient
| |
46 // if(a) { | |
47 // var b=[]; | |
48 // a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) }); | |
49 // b; | |
50 // } | |
51 String CopylessPasteExtractor::extract(Document& document) { | |
52 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | |
53 | |
54 if (!document.frame() || !document.frame()->isMainFrame()) | |
55 return ""; | |
56 | |
57 DCHECK(document.hasFinishedParsing()); | |
58 | |
59 Element* html = document.documentElement(); | |
60 if (!html) | |
61 return ""; | |
esprehn
2017/02/13 23:47:04
emptyString
wychen
2017/02/14 02:24:16
Done.
| |
62 | |
63 double startTime = monotonicallyIncreasingTime(); | |
64 | |
65 // Traverse the DOM tree and extract the metadata. | |
66 String result = extractMetadata(*html); | |
67 | |
68 double elapsedTime = monotonicallyIncreasingTime() - startTime; | |
69 | |
70 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
71 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
72 extractionHistogram.count(static_cast<int>(1e6 * elapsedTime)); | |
73 return result; | |
74 } | |
75 | |
76 } // namespace blink | |
OLD | NEW |