OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "modules/document_metadata/CopylessPasteExtractor.h" | 5 #include "modules/document_metadata/CopylessPasteExtractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <memory> | 8 #include <memory> |
9 #include <utility> | 9 #include <utility> |
10 | 10 |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 | 240 |
241 void extractEntityFromTopLevelObject(const JSONObject& val, | 241 void extractEntityFromTopLevelObject(const JSONObject& val, |
242 Vector<EntityPtr>& entities) { | 242 Vector<EntityPtr>& entities) { |
243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); | 243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); |
244 if (graph) { | 244 if (graph) { |
245 extractEntitiesFromArray(*graph, entities); | 245 extractEntitiesFromArray(*graph, entities); |
246 } | 246 } |
247 extractTopLevelEntity(val, entities); | 247 extractTopLevelEntity(val, entities); |
248 } | 248 } |
249 | 249 |
250 bool extractMetadata(const Element& root, Vector<EntityPtr>& entities) { | 250 // ExtractionStatus is used in UMA, hence is append-only. |
| 251 // kCount must be the last entry. |
| 252 enum ExtractionStatus { kOK, kEmpty, kParseFailure, kWrongType, kCount }; |
| 253 |
| 254 ExtractionStatus extractMetadata(const Element& root, |
| 255 Vector<EntityPtr>& entities) { |
251 for (Element& element : ElementTraversal::DescendantsOf(root)) { | 256 for (Element& element : ElementTraversal::DescendantsOf(root)) { |
252 if (element.HasTagName(HTMLNames::scriptTag) && | 257 if (element.HasTagName(HTMLNames::scriptTag) && |
253 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | 258 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
254 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); | 259 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); |
255 if (!json) { | 260 if (!json) { |
256 LOG(ERROR) << "Failed to parse json."; | 261 LOG(ERROR) << "Failed to parse json."; |
257 return false; | 262 return kParseFailure; |
258 } | 263 } |
259 switch (json->GetType()) { | 264 switch (json->GetType()) { |
260 case JSONValue::ValueType::kTypeArray: | 265 case JSONValue::ValueType::kTypeArray: |
261 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); | 266 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); |
262 break; | 267 break; |
263 case JSONValue::ValueType::kTypeObject: | 268 case JSONValue::ValueType::kTypeObject: |
264 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), | 269 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), |
265 entities); | 270 entities); |
266 break; | 271 break; |
267 default: | 272 default: |
268 return false; | 273 return kWrongType; |
269 } | 274 } |
270 } | 275 } |
271 } | 276 } |
272 return !entities.IsEmpty(); | 277 if (entities.IsEmpty()) { |
| 278 return kEmpty; |
| 279 } |
| 280 return kOK; |
273 } | 281 } |
274 | 282 |
275 } // namespace | 283 } // namespace |
276 | 284 |
277 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { | 285 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { |
278 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | 286 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); |
279 | 287 |
280 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) | 288 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) |
281 return nullptr; | 289 return nullptr; |
282 | 290 |
283 Element* html = document.documentElement(); | 291 Element* html = document.documentElement(); |
284 if (!html) | 292 if (!html) |
285 return nullptr; | 293 return nullptr; |
286 | 294 |
287 double start_time = MonotonicallyIncreasingTime(); | |
288 | |
289 WebPagePtr page = WebPage::New(); | 295 WebPagePtr page = WebPage::New(); |
290 | 296 |
291 // Traverse the DOM tree and extract the metadata. | 297 // Traverse the DOM tree and extract the metadata. |
292 if (!extractMetadata(*html, page->entities)) | 298 double start_time = MonotonicallyIncreasingTime(); |
| 299 ExtractionStatus status = extractMetadata(*html, page->entities); |
| 300 double elapsed_time = MonotonicallyIncreasingTime() - start_time; |
| 301 |
| 302 DEFINE_STATIC_LOCAL(EnumerationHistogram, status_histogram, |
| 303 ("CopylessPaste.ExtractionStatus", kCount)); |
| 304 status_histogram.Count(status); |
| 305 |
| 306 if (status != kOK) { |
| 307 DEFINE_STATIC_LOCAL( |
| 308 CustomCountHistogram, extractionHistogram, |
| 309 ("CopylessPaste.ExtractionFailedUs", 1, 1000 * 1000, 50)); |
| 310 extractionHistogram.Count(1e6 * elapsed_time); |
293 return nullptr; | 311 return nullptr; |
| 312 } |
| 313 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, |
| 314 ("CopylessPaste.ExtractionUs", 1, 1000 * 1000, 50)); |
| 315 extractionHistogram.Count(1e6 * elapsed_time); |
| 316 |
294 page->url = document.Url(); | 317 page->url = document.Url(); |
295 page->title = document.title(); | 318 page->title = document.title(); |
296 | |
297 double elapsed_time = MonotonicallyIncreasingTime() - start_time; | |
298 | |
299 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
300 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
301 extractionHistogram.Count(static_cast<int>(1e6 * elapsed_time)); | |
302 return page; | 319 return page; |
303 } | 320 } |
304 | 321 |
305 } // namespace blink | 322 } // namespace blink |
OLD | NEW |