OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "modules/document_metadata/CopylessPasteExtractor.h" | 5 #include "modules/document_metadata/CopylessPasteExtractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <memory> | 8 #include <memory> |
9 #include <utility> | 9 #include <utility> |
10 | 10 |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
240 | 240 |
241 void extractEntityFromTopLevelObject(const JSONObject& val, | 241 void extractEntityFromTopLevelObject(const JSONObject& val, |
242 Vector<EntityPtr>& entities) { | 242 Vector<EntityPtr>& entities) { |
243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); | 243 const JSONArray* graph = val.GetArray(kJSONLDKeyGraph); |
244 if (graph) { | 244 if (graph) { |
245 extractEntitiesFromArray(*graph, entities); | 245 extractEntitiesFromArray(*graph, entities); |
246 } | 246 } |
247 extractTopLevelEntity(val, entities); | 247 extractTopLevelEntity(val, entities); |
248 } | 248 } |
249 | 249 |
250 bool extractMetadata(const Element& root, Vector<EntityPtr>& entities) { | 250 // kCount must be the last entry. |
Ilya Sherman
2017/04/12 23:25:55
nit: Please document that this enum is used to bac
wychen
2017/04/13 00:47:54
Done.
| |
251 enum ExtractionStatus { kOK, kEmpty, kParseFailure, kWrongType, kCount }; | |
Ilya Sherman
2017/04/12 23:25:55
nit: Could this be an enum class?
wychen
2017/04/12 23:40:04
Great suggestion. The UMA API in base/ supports en
| |
252 | |
253 ExtractionStatus extractMetadata(const Element& root, | |
254 Vector<EntityPtr>& entities) { | |
251 for (Element& element : ElementTraversal::DescendantsOf(root)) { | 255 for (Element& element : ElementTraversal::DescendantsOf(root)) { |
252 if (element.HasTagName(HTMLNames::scriptTag) && | 256 if (element.HasTagName(HTMLNames::scriptTag) && |
253 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { | 257 element.getAttribute(HTMLNames::typeAttr) == "application/ld+json") { |
254 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); | 258 std::unique_ptr<JSONValue> json = ParseJSON(element.textContent()); |
255 if (!json) { | 259 if (!json) { |
256 LOG(ERROR) << "Failed to parse json."; | 260 LOG(ERROR) << "Failed to parse json."; |
257 return false; | 261 return kParseFailure; |
258 } | 262 } |
259 switch (json->GetType()) { | 263 switch (json->GetType()) { |
260 case JSONValue::ValueType::kTypeArray: | 264 case JSONValue::ValueType::kTypeArray: |
261 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); | 265 extractEntitiesFromArray(*(JSONArray::Cast(json.get())), entities); |
262 break; | 266 break; |
263 case JSONValue::ValueType::kTypeObject: | 267 case JSONValue::ValueType::kTypeObject: |
264 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), | 268 extractEntityFromTopLevelObject(*(JSONObject::Cast(json.get())), |
265 entities); | 269 entities); |
266 break; | 270 break; |
267 default: | 271 default: |
268 return false; | 272 return kWrongType; |
269 } | 273 } |
270 } | 274 } |
271 } | 275 } |
272 return !entities.IsEmpty(); | 276 if (entities.IsEmpty()) { |
277 return kEmpty; | |
278 } | |
279 return kOK; | |
273 } | 280 } |
274 | 281 |
275 } // namespace | 282 } // namespace |
276 | 283 |
277 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { | 284 WebPagePtr CopylessPasteExtractor::extract(const Document& document) { |
278 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); | 285 TRACE_EVENT0("blink", "CopylessPasteExtractor::extract"); |
279 | 286 |
280 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) | 287 if (!document.GetFrame() || !document.GetFrame()->IsMainFrame()) |
281 return nullptr; | 288 return nullptr; |
282 | 289 |
283 Element* html = document.documentElement(); | 290 Element* html = document.documentElement(); |
284 if (!html) | 291 if (!html) |
285 return nullptr; | 292 return nullptr; |
286 | 293 |
287 double start_time = MonotonicallyIncreasingTime(); | |
288 | |
289 WebPagePtr page = WebPage::New(); | 294 WebPagePtr page = WebPage::New(); |
290 | 295 |
291 // Traverse the DOM tree and extract the metadata. | 296 // Traverse the DOM tree and extract the metadata. |
292 if (!extractMetadata(*html, page->entities)) | 297 double start_time = MonotonicallyIncreasingTime(); |
298 ExtractionStatus status = extractMetadata(*html, page->entities); | |
299 double elapsed_time = MonotonicallyIncreasingTime() - start_time; | |
300 | |
301 DEFINE_STATIC_LOCAL(EnumerationHistogram, status_histogram, | |
302 ("CopylessPaste.ExtractionStatus", kCount)); | |
303 status_histogram.Count(status); | |
304 | |
305 if (status != kOK) { | |
306 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
307 ("CopylessPaste.ExtractionFailedUs", 1, 1000000, 50)); | |
Ilya Sherman
2017/04/12 23:25:55
Optional nit: Could you please write 1000 * 1000 i
wychen
2017/04/13 00:47:54
Done.
| |
308 extractionHistogram.Count(1e6 * elapsed_time); | |
293 return nullptr; | 309 return nullptr; |
310 } | |
311 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
312 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
313 extractionHistogram.Count(1e6 * elapsed_time); | |
314 | |
294 page->url = document.Url(); | 315 page->url = document.Url(); |
295 page->title = document.title(); | 316 page->title = document.title(); |
296 | |
297 double elapsed_time = MonotonicallyIncreasingTime() - start_time; | |
298 | |
299 DEFINE_STATIC_LOCAL(CustomCountHistogram, extractionHistogram, | |
300 ("CopylessPaste.ExtractionUs", 1, 1000000, 50)); | |
301 extractionHistogram.Count(static_cast<int>(1e6 * elapsed_time)); | |
302 return page; | 317 return page; |
303 } | 318 } |
304 | 319 |
305 } // namespace blink | 320 } // namespace blink |
OLD | NEW |