Index: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp |
diff --git a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp |
index 2e7e51825a65afc3a65fcb75f48c750c0e42b40f..4ef9e481a7e64f5a5ad66a053b574a2c40e4a65f 100644 |
--- a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp |
+++ b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp |
@@ -247,14 +247,18 @@ void extractEntityFromTopLevelObject(const JSONObject& val, |
extractTopLevelEntity(val, entities); |
} |
-bool extractMetadata(const Element& root, Vector<EntityPtr>& entities) { |
+// kCount must be the last entry. |
Ilya Sherman
2017/04/12 23:25:55
nit: Please document that this enum is used to bac
wychen
2017/04/13 00:47:54
Done.
|
+enum ExtractionStatus { kOK, kEmpty, kParseFailure, kWrongType, kCount }; |
Ilya Sherman
2017/04/12 23:25:55
nit: Could this be an enum class?
wychen
2017/04/12 23:40:04
Great suggestion. The UMA API in base/ supports en
|
+ |
+ExtractionStatus extractMetadata(const Element& root, |
+ Vector<EntityPtr>& entities) { |
for (Element& element : ElementTraversal::DescendantsOf(root)) { |
if (element.HasTagName(HTMLNames::scriptTag) && |
element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); |
if (!json) { |
LOG(ERROR) << "Failed to parse json."; |
- return false; |
+ return kParseFailure; |
} |
switch (json->GetType()) { |
case JSONValue::ValueType::kTypeArray: |
@@ -265,11 +269,14 @@ bool extractMetadata(const Element& root, Vector<EntityPtr>& entities) { |
entities); |
break; |
default: |
- return false; |
+ return kWrongType; |
} |
} |
} |
- return !entities.IsEmpty(); |
+ if (entities.IsEmpty()) { |
+ return kEmpty; |
+ } |
+ return kOK; |
} |
} // namespace |
@@ -284,21 +291,29 @@ WebPagePtr CopylessPasteExtractor::extract(const Document& document) { |
if (!html) |
return nullptr; |
- double start_time = MonotonicallyIncreasingTime(); |
- |
WebPagePtr page = WebPage::New(); |
// Traverse the DOM tree and extract the metadata. |
- if (!extractMetadata(*html, page->entities)) |
- return nullptr; |
- page->url = document.Url(); |
- page->title = document.title(); |
- |
+ double start_time = MonotonicallyIncreasingTime(); |
+ ExtractionStatus status = extractMetadata(*html, page->entities); |
double elapsed_time = MonotonicallyIncreasingTime() - start_time; |
+ DEFINE_STATIC_LOCAL(EnumerationHistogram, status_histogram, |
+ ("CopylessPaste.ExtractionStatus", kCount)); |
+ status_histogram.Count(status); |
+ |
+ if (status != kOK) { |
+ DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, |
+ ("CopylessPaste.ExtractionFailedUs", 1, 1000000, 50)); |
Ilya Sherman
2017/04/12 23:25:55
Optional nit: Could you please write 1000 * 1000 i
wychen
2017/04/13 00:47:54
Done.
|
+ extractionHistogram.Count(1e6 * elapsed_time); |
+ return nullptr; |
+ } |
DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, |
("CopylessPaste.ExtractionUs", 1, 1000000, 50)); |
- extractionHistogram.Count(static_cast<int>(1e6 * elapsed_time)); |
+ extractionHistogram.Count(1e6 * elapsed_time); |
+ |
+ page->url = document.Url(); |
+ page->title = document.title(); |
return page; |
} |