Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(805)

Side by Side Diff: third_party/WebKit/Source/modules/copyless_paste/CopylessPasteExtractor.cpp

Issue 2690903005: Metadata extraction for Copyless Paste (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "modules/copyless_paste/CopylessPasteExtractor.h"
6
7 #include "core/HTMLNames.h"
8 #include "core/dom/Document.h"
9 #include "core/dom/ElementTraversal.h"
10 #include "core/frame/LocalFrame.h"
11 #include "core/html/HTMLElement.h"
12 #include "platform/Histogram.h"
13 #include "platform/instrumentation/tracing/TraceEvent.h"
14 #include "wtf/text/StringBuilder.h"
15
16 namespace blink {
17
18 namespace {
19
20 String extractMetadata(Element& root) {
21 StringBuilder result;
22 result.append("[");
23 bool multiple = false;
24 for (Element& element : ElementTraversal::descendantsOf(root)) {
25 if (!element.hasTagName(HTMLNames::scriptTag))
esprehn 2017/02/13 23:47:04 This doesn't understand shadow DOM, also this is w
wychen 2017/02/14 00:26:08 Not understanding shadow DOM should be fine, since
26 continue;
27 if (element.hasTagName(HTMLNames::scriptTag) &&
28 element.hasAttribute(HTMLNames::typeAttr) &&
esprehn 2017/02/13 23:47:04 no need to check hasAttribute, this is just doing
wychen 2017/02/14 02:24:16 Done.
29 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") {
30 if (multiple) {
31 result.append(",");
32 }
33 result.append(element.textContent(false));
esprehn 2017/02/13 23:47:04 false is the default, remove this
wychen 2017/02/14 02:24:16 Done.
34 multiple = true;
35 }
36 }
37 result.append("]");
38 return result.toString();
39 }
40
41 } // namespace
42
43 // Do JSON-LD extraction, which is equivalant to the following JavaScript code:
44 //
45 // var a=document.querySelectorAll("script[type=\"application/ld+json\"]");
esprehn 2017/02/13 23:47:04 Can we store these in a map instead of scanning th
wychen 2017/02/14 00:26:08 The JSON-LD data extracted here would be transient
46 // if(a) {
47 // var b=[];
48 // a.forEach(function(s) {b.push(JSON.parse(s.innerHTML)) });
49 // b;
50 // }
51 String CopylessPasteExtractor::extract(Document& document) {
52 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract");
53
54 if (!document.frame() || !document.frame()->isMainFrame())
55 return "";
56
57 DCHECK(document.hasFinishedParsing());
58
59 Element* html = document.documentElement();
60 if (!html)
61 return "";
esprehn 2017/02/13 23:47:04 emptyString
wychen 2017/02/14 02:24:16 Done.
62
63 double startTime = monotonicallyIncreasingTime();
64
65 // Traverse the DOM tree and extract the metadata.
66 String result = extractMetadata(*html);
67
68 double elapsedTime = monotonicallyIncreasingTime() - startTime;
69
70 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram,
71 ("CopylessPaste.ExtractionUs", 1, 1000000, 50));
72 extractionHistogram.count(static_cast<int>(1e6 * elapsedTime));
73 return result;
74 }
75
76 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698