Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(162)

Unified Diff: src/de/l3s/boilerpipe/document/TextDocument.java

Issue 499623002: Instrument DomDistiller with timing information. (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/com/dom_distiller/client/DomUtil.java ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/de/l3s/boilerpipe/document/TextDocument.java
diff --git a/src/de/l3s/boilerpipe/document/TextDocument.java b/src/de/l3s/boilerpipe/document/TextDocument.java
index 4c31308c8b463e053683f8e6eeced82faeaf7ac3..ab74aaf4563727dae369b0655d2b70109c02e902 100644
--- a/src/de/l3s/boilerpipe/document/TextDocument.java
+++ b/src/de/l3s/boilerpipe/document/TextDocument.java
@@ -20,17 +20,23 @@
*/
package de.l3s.boilerpipe.document;
+import com.google.gwt.dom.client.Node;
+
import java.util.LinkedList;
import java.util.List;
+import java.util.Set;
/**
- * A text document, consisting of one or more {@link TextBlock}s.
+ * A text document, consisting of one or more {@link TextBlock}s, and features
+ * of the original page (e.g. candidate titles, hidden elements, etc).
*
* @author Christian Kohlschütter
*/
public class TextDocument implements Cloneable {
final List<TextBlock> textBlocks;
List<String> candidateTitles;
+ private Set<Node> dataTables;
+ private Set<Node> hiddenElements;
/**
* Creates a new {@link TextDocument} with given {@link TextBlock}s, and no
@@ -81,9 +87,10 @@ public class TextDocument implements Cloneable {
* Sets the list of candidate titles.
* @param candidateTitles
*/
- public void setCanddiateTitles(List<String> candidateTitles) {
+ public void setCandidateTitles(List<String> candidateTitles) {
this.candidateTitles = new LinkedList<String>(candidateTitles);
}
+
/**
* Returns the {@link TextDocument}'s content.
*
@@ -131,4 +138,39 @@ public class TextDocument implements Cloneable {
}
return sb.toString();
}
+
+ /**
+ * Returns a list of nodes from the original Document which were classified as data tables
+ * (i.e. are treated as an atomic block of text).
+ * @return the set of data tables
+ */
+ public Set<Node> getDataTables() {
+ return dataTables;
+ }
+
+ /**
+ * Sets the data tables identified while processing the document.
+ * @param dataTables the set of data tables
+ */
+ public void setDataTables(Set<Node> dataTables) {
+ this.dataTables = dataTables;
+ }
+
+ /**
+ * Returns a list of nodes fro mteh original Document which weren't actually visible. These
+ * are typically omitted from boilerpipe text processing but are tracked for post-processing.
+ * @return the set of hidden elements
+ */
+ public Set<Node> getHiddenElements() {
+ return hiddenElements;
+ }
+
+ /**
+ * Sets the hidden elements identified while processing the document.
+ * @param hiddenElements the set of hidden elements
+ */
+ public void setHiddenElements(Set<Node> hiddenElements) {
+ this.hiddenElements = hiddenElements;
+ }
+
}
« no previous file with comments | « src/com/dom_distiller/client/DomUtil.java ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698