| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
| 6 | 6 |
| 7 import org.chromium.distiller.document.TextDocument; | 7 import org.chromium.distiller.document.TextDocument; |
| 8 import org.chromium.distiller.document.TextBlock; | 8 import org.chromium.distiller.document.TextBlock; |
| 9 | 9 |
| 10 import java.util.ArrayList; | 10 import java.util.ArrayList; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 } | 34 } |
| 35 | 35 |
| 36 public void addEmbed(WebElement embed) { | 36 public void addEmbed(WebElement embed) { |
| 37 elements.add(embed); | 37 elements.add(embed); |
| 38 } | 38 } |
| 39 | 39 |
| 40 public List<WebElement> getElements() { | 40 public List<WebElement> getElements() { |
| 41 return elements; | 41 return elements; |
| 42 } | 42 } |
| 43 | 43 |
| 44 public List<WebImage> getContentImages() { | 44 public List<String> getImageUrls() { |
| 45 List<WebImage> images = new ArrayList<>(); | 45 List<String> images = new ArrayList<>(); |
| 46 for (WebElement e : elements) { | 46 for (WebElement e : elements) { |
| 47 if (e instanceof WebImage && e.getIsContent()) { | 47 if (!e.getIsContent()) continue; |
| 48 images.add((WebImage) e); | 48 if (e instanceof WebImage) { |
| 49 images.addAll(((WebImage) e).getUrlList()); |
| 49 } | 50 } |
| 50 } | 51 } |
| 51 return images; | 52 return images; |
| 52 } | 53 } |
| 53 | 54 |
| 54 /** | 55 /** |
| 55 * This method generates a web document to be processed by boilerpipe. Text
groups have been | 56 * This method generates a web document to be processed by boilerpipe. Text
groups have been |
| 56 * introduced to help retain element order when adding images and embeds. | 57 * introduced to help retain element order when adding images and embeds. |
| 57 * @return TextDocument object built from this web document. | 58 * @return TextDocument object built from this web document. |
| 58 */ | 59 */ |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 101 if (!e.getIsContent()) continue; | 102 if (!e.getIsContent()) continue; |
| 102 output.append(e.generateOutput(textOnly)); | 103 output.append(e.generateOutput(textOnly)); |
| 103 if (textOnly) { | 104 if (textOnly) { |
| 104 // Put some space between paragraphs in text-only mode. | 105 // Put some space between paragraphs in text-only mode. |
| 105 output.append("\n"); | 106 output.append("\n"); |
| 106 } | 107 } |
| 107 } | 108 } |
| 108 return output.toString(); | 109 return output.toString(); |
| 109 } | 110 } |
| 110 } | 111 } |
| OLD | NEW |