 Chromium Code Reviews
 Chromium Code Reviews Issue 1705123002:
  Add support for Schema.org/Recipe 
  Base URL: https://github.com/chromium/dom-distiller.git@master
    
  
    Issue 1705123002:
  Add support for Schema.org/Recipe 
  Base URL: https://github.com/chromium/dom-distiller.git@master| OLD | NEW | 
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; | 
| 6 | 6 | 
| 7 import org.chromium.distiller.document.TextDocument; | 7 import org.chromium.distiller.document.TextDocument; | 
| 8 import org.chromium.distiller.document.TextDocumentStatistics; | 8 import org.chromium.distiller.document.TextDocumentStatistics; | 
| 9 import org.chromium.distiller.extractors.ArticleExtractor; | 9 import org.chromium.distiller.extractors.ArticleExtractor; | 
| 10 import org.chromium.distiller.proto.DomDistillerProtos.StatisticsInfo; | 10 import org.chromium.distiller.proto.DomDistillerProtos.StatisticsInfo; | 
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 79 ensureTitleInitialized(); | 79 ensureTitleInitialized(); | 
| 80 assert candidateTitles.size() > 0; | 80 assert candidateTitles.size() > 0; | 
| 81 return candidateTitles.get(0); | 81 return candidateTitles.get(0); | 
| 82 } | 82 } | 
| 83 | 83 | 
| 84 public String extractContent() { | 84 public String extractContent() { | 
| 85 return extractContent(false); | 85 return extractContent(false); | 
| 86 } | 86 } | 
| 87 | 87 | 
| 88 public String extractContent(boolean textOnly) { | 88 public String extractContent(boolean textOnly) { | 
| 89 | |
| 
wychen
2016/07/24 23:06:34
nit: extra line
 | |
| 89 double now = DomUtil.getTime(); | 90 double now = DomUtil.getTime(); | 
| 91 String structuredData = parser.getStructuredData(); | |
| 92 LogUtil.addTimingInfo(now, mTimingInfo, "parser.getStructuredData()"); | |
| 
wychen
2016/07/24 23:06:33
Maybe just "getStructuredData" for consistency.
 | |
| 93 if (!structuredData.isEmpty()) { | |
| 94 return structuredData; | |
| 95 } | |
| 96 now = DomUtil.getTime(); | |
| 90 WebDocumentInfo documentInfo = createWebDocumentInfoFromPage(); | 97 WebDocumentInfo documentInfo = createWebDocumentInfoFromPage(); | 
| 91 mTimingInfo.setDocumentConstructionTime(DomUtil.getTime() - now); | 98 mTimingInfo.setDocumentConstructionTime(DomUtil.getTime() - now); | 
| 92 | 99 | 
| 93 now = DomUtil.getTime(); | 100 now = DomUtil.getTime(); | 
| 94 processDocument(documentInfo.document); | 101 processDocument(documentInfo.document); | 
| 95 RelevantElements.process(documentInfo.document); | 102 RelevantElements.process(documentInfo.document); | 
| 96 LeadImageFinder.process(documentInfo.document); | 103 LeadImageFinder.process(documentInfo.document); | 
| 97 NestedElementRetainer.process(documentInfo.document); | 104 NestedElementRetainer.process(documentInfo.document); | 
| 98 | 105 | 
| 99 List<WebImage> images = documentInfo.document.getContentImages(); | 106 List<WebImage> images = documentInfo.document.getContentImages(); | 
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 186 * | 193 * | 
| 187 * @param document the WebDocument representation of the page extracted from the DOM. | 194 * @param document the WebDocument representation of the page extracted from the DOM. | 
| 188 */ | 195 */ | 
| 189 private void processDocument(WebDocument document) { | 196 private void processDocument(WebDocument document) { | 
| 190 TextDocument textDocument = document.createTextDocumentView(); | 197 TextDocument textDocument = document.createTextDocumentView(); | 
| 191 ArticleExtractor.INSTANCE.process(textDocument, candidateTitles); | 198 ArticleExtractor.INSTANCE.process(textDocument, candidateTitles); | 
| 192 mStatisticsInfo.setWordCount(TextDocumentStatistics.countWordsInContent( textDocument)); | 199 mStatisticsInfo.setWordCount(TextDocumentStatistics.countWordsInContent( textDocument)); | 
| 193 textDocument.applyToModel(); | 200 textDocument.applyToModel(); | 
| 194 } | 201 } | 
| 195 } | 202 } | 
| OLD | NEW |