Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "modules/document_metadata/CopylessPasteExtractor.h" | 5 #include "modules/document_metadata/CopylessPasteExtractor.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <memory> | 8 #include <memory> |
| 9 #include <utility> | 9 #include <utility> |
| 10 | 10 |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 240 | 240 |
| 241 void extractEntityFromTopLevelObject(const JSONObject& val, | 241 void extractEntityFromTopLevelObject(const JSONObject& val, |
| 242 Vector<EntityPtr>& entities) { | 242 Vector<EntityPtr>& entities) { |
| 243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); | 243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); |
| 244 if (graph) { | 244 if (graph) { |
| 245 extractEntitiesFromArray(*graph, entities); | 245 extractEntitiesFromArray(*graph, entities); |
| 246 } | 246 } |
| 247 extractTopLevelEntity(val, entities); | 247 extractTopLevelEntity(val, entities); |
| 248 } | 248 } |
| 249 | 249 |
| 250 bool extractMetadata(const Element& root, Vector<EntityPtr>& entities) { | 250 // kCount must be the last entry. |
| 251 enum ExtractionStatus { kOK, kEmpty, kParseFailure, kWrongType, kCount }; | |
| 252 | |
| 253 ExtractionStatus extractMetadata(const Element& root, | |
| 254 Vector<EntityPtr>& entities) { | |
| 251 for (Element& element : ElementTraversal::DescendantsOf(root)) { | 255 for (Element& element : ElementTraversal::DescendantsOf(root)) { |
| 252 if (element.HasTagName(HTMLNames::scriptTag) && | 256 if (element.HasTagName(HTMLNames::scriptTag) && |
| 253 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | 257 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
| 254 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); | 258 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); |
| 255 if (!json) { | 259 if (!json) { |
| 256 LOG(ERROR) << "Failed to parse json."; | 260 LOG(ERROR) << "Failed to parse json."; |
| 257 return false; | 261 return kParseFailure; |
| 258 } | 262 } |
| 259 switch (json->GetType()) { | 263 switch (json->GetType()) { |
| 260 case JSONValue::ValueType::kTypeArray: | 264 case JSONValue::ValueType::kTypeArray: |
| 261 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); | 265 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); |
| 262 break; | 266 break; |
| 263 case JSONValue::ValueType::kTypeObject: | 267 case JSONValue::ValueType::kTypeObject: |
| 264 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), | 268 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), |
| 265 entities); | 269 entities); |
| 266 break; | 270 break; |
| 267 default: | 271 default: |
| 268 return false; | 272 return kWrongType; |
| 269 } | 273 } |
| 270 } | 274 } |
| 271 } | 275 } |
| 272 return !entities.IsEmpty(); | 276 if (entities.IsEmpty()) { |
| 277 return kEmpty; | |
| 278 } | |
| 279 return kOK; | |
| 273 } | 280 } |
| 274 | 281 |
| 275 } // namespace | 282 } // namespace |
| 276 | 283 |
| 277 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { | 284 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { |
| 278 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | 285 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); |
| 279 | 286 |
| 280 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) | 287 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) |
| 281 return nullptr; | 288 return nullptr; |
| 282 | 289 |
| 283 Element* html = document.documentElement(); | 290 Element* html = document.documentElement(); |
| 284 if (!html) | 291 if (!html) |
| 285 return nullptr; | 292 return nullptr; |
| 286 | 293 |
| 287 double start_time = MonotonicallyIncreasingTime(); | |
| 288 | |
| 289 WebPagePtr page = WebPage::New(); | 294 WebPagePtr page = WebPage::New(); |
| 290 | 295 |
| 291 // Traverse the DOM tree and extract the metadata. | 296 // Traverse the DOM tree and extract the metadata. |
| 292 if (!extractMetadata(*html, page->entities)) | 297 double start_time = MonotonicallyIncreasingTime(); |
| 298 ExtractionStatus status = extractMetadata(*html, page->entities); | |
| 299 double elapsed_time = MonotonicallyIncreasingTime() - start_time; | |
| 300 | |
| 301 DEFINE_STATIC_LOCAL(EnumerationHistogram, status_histogram, | |
| 302 ("CopylessPaste.ExtractionStatus", kCount)); | |
| 303 status_histogram.Count(status); | |
| 304 | |
| 305 // Couldn't use SCOPED_BLINK_UMA_HISTOGRAM_TIMER() due to dynamic naming. | |
|
haraken
2017/04/12 02:10:22
Drop this comment.
wychen
2017/04/12 02:18:12
Done.
| |
| 306 if (status != kOK) { | |
| 307 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
| 308 ("CopylessPaste.ExtractionFailedUs", 1, 1000000, 50)); | |
| 309 extractionHistogram.Count(1e6 * elapsed_time); | |
|
haraken
2017/04/12 02:10:22
elapsed_time is in microseconds. So this should be
wychen
2017/04/12 02:18:11
MonotonicallyIncreasingTime() is in seconds.
| |
| 293 return nullptr; | 310 return nullptr; |
| 311 } | |
| 312 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
| 313 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
| 314 extractionHistogram.Count(1e6 * elapsed_time); | |
| 315 | |
| 294 page->url = document.Url(); | 316 page->url = document.Url(); |
| 295 page->title = document.title(); | 317 page->title = document.title(); |
| 296 | |
| 297 double elapsed_time = MonotonicallyIncreasingTime() - start_time; | |
| 298 | |
| 299 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
| 300 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
| 301 extractionHistogram.Count(static_cast<int>(1e6 * elapsed_time)); | |
| 302 return page; | 318 return page; |
| 303 } | 319 } |
| 304 | 320 |
| 305 } // namespace blink | 321 } // namespace blink |
| OLD | NEW |