Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import org.chromium.distiller.document.TextDocument; | 7 import org.chromium.distiller.document.TextDocument; |
| 8 import org.chromium.distiller.document.TextDocumentStatistics; | 8 import org.chromium.distiller.document.TextDocumentStatistics; |
| 9 import org.chromium.distiller.extractors.ArticleExtractor; | 9 import org.chromium.distiller.extractors.ArticleExtractor; |
| 10 import org.chromium.distiller.proto.DomDistillerProtos.StatisticsInfo; | 10 import org.chromium.distiller.proto.DomDistillerProtos.StatisticsInfo; |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 79 ensureTitleInitialized(); | 79 ensureTitleInitialized(); |
| 80 assert candidateTitles.size() > 0; | 80 assert candidateTitles.size() > 0; |
| 81 return candidateTitles.get(0); | 81 return candidateTitles.get(0); |
| 82 } | 82 } |
| 83 | 83 |
| 84 public String extractContent() { | 84 public String extractContent() { |
| 85 return extractContent(false); | 85 return extractContent(false); |
| 86 } | 86 } |
| 87 | 87 |
| 88 public String extractContent(boolean textOnly) { | 88 public String extractContent(boolean textOnly) { |
| 89 | |
| 90 String structuredData = parser.getStructuredData(); | |
|
wychen
2016/03/14 22:58:42
Might make sense to measure the time spent in this
| |
| 91 if (!structuredData.isEmpty()) { | |
| 92 return structuredData; | |
| 93 } | |
| 94 | |
| 89 double now = DomUtil.getTime(); | 95 double now = DomUtil.getTime(); |
| 90 WebDocumentInfo documentInfo = createWebDocumentInfoFromPage(); | 96 WebDocumentInfo documentInfo = createWebDocumentInfoFromPage(); |
| 91 mTimingInfo.setDocumentConstructionTime(DomUtil.getTime() - now); | 97 mTimingInfo.setDocumentConstructionTime(DomUtil.getTime() - now); |
| 92 | 98 |
| 93 now = DomUtil.getTime(); | 99 now = DomUtil.getTime(); |
| 94 processDocument(documentInfo.document); | 100 processDocument(documentInfo.document); |
| 95 RelevantElements.process(documentInfo.document); | 101 RelevantElements.process(documentInfo.document); |
| 96 LeadImageFinder.process(documentInfo.document); | 102 LeadImageFinder.process(documentInfo.document); |
| 97 NestedElementRetainer.process(documentInfo.document); | 103 NestedElementRetainer.process(documentInfo.document); |
| 98 | 104 |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 208 * | 214 * |
| 209 * @param document the WebDocument representation of the page extracted from the DOM. | 215 * @param document the WebDocument representation of the page extracted from the DOM. |
| 210 */ | 216 */ |
| 211 private void processDocument(WebDocument document) { | 217 private void processDocument(WebDocument document) { |
| 212 TextDocument textDocument = document.createTextDocumentView(); | 218 TextDocument textDocument = document.createTextDocumentView(); |
| 213 ArticleExtractor.INSTANCE.process(textDocument, candidateTitles); | 219 ArticleExtractor.INSTANCE.process(textDocument, candidateTitles); |
| 214 mStatisticsInfo.setWordCount(TextDocumentStatistics.countWordsInContent( textDocument)); | 220 mStatisticsInfo.setWordCount(TextDocumentStatistics.countWordsInContent( textDocument)); |
| 215 textDocument.applyToModel(); | 221 textDocument.applyToModel(); |
| 216 } | 222 } |
| 217 } | 223 } |
| OLD | NEW |