Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1595)

Unified Diff: boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java

Issue 296113004: Start using computed style instead of default tag actions. (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Fixed nit. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java
diff --git a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java b/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java
index 123031e6293ccf61bd31e9cc56d9b6c11c04fd11..6a89637148c86a5657617f0ff40c1a99a7b7f8fc 100644
--- a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java
+++ b/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/CommonTagActions.java
@@ -32,8 +32,8 @@ import de.l3s.boilerpipe.labels.LabelAction;
*/
public abstract class CommonTagActions {
- private CommonTagActions() {
- }
+ private CommonTagActions() {
+ }
public static final class Chained implements TagAction {
@@ -46,19 +46,16 @@ public abstract class CommonTagActions {
}
public boolean start(BoilerpipeHTMLContentHandler instance,
- String localName, String qName, Attributes atts) {
- return t1.start(instance, localName, qName, atts)
- | t2.start(instance, localName, qName, atts);
+ Attributes atts) {
+ return t1.start(instance, atts) | t2.start(instance, atts);
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- String localName, String qName) {
- return t1.end(instance, localName, qName)
- | t2.end(instance, localName, qName);
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
+ return t1.end(instance) | t2.end(instance);
}
public boolean changesTagLevel() {
- return t1.changesTagLevel() || t2.changesTagLevel();
+ return t1.changesTagLevel() || t2.changesTagLevel();
}
}
@@ -68,20 +65,18 @@ public abstract class CommonTagActions {
public static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() {
public boolean start(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
instance.inIgnorableElement++;
return true;
}
- public boolean end(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(final BoilerpipeHTMLContentHandler instance) {
instance.inIgnorableElement--;
return true;
}
public boolean changesTagLevel() {
- return true;
+ return true;
}
};
@@ -95,15 +90,14 @@ public abstract class CommonTagActions {
public static final TagAction TA_ANCHOR_TEXT = new TagAction() {
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
if (instance.inAnchor++ > 0) {
// as nested A elements are not allowed per specification, we
// are probably reaching this branch due to a bug in the XML
// parser
- System.err.println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");
-
- end(instance, localName, qName);
+ System.err.println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");
+
+ end(instance);
}
if (instance.inIgnorableElement == 0) {
instance.addWhitespaceIfNecessary();
@@ -115,8 +109,7 @@ public abstract class CommonTagActions {
return false;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
if (--instance.inAnchor == 0) {
if (instance.inIgnorableElement == 0) {
instance.addWhitespaceIfNecessary();
@@ -130,7 +123,7 @@ public abstract class CommonTagActions {
}
public boolean changesTagLevel() {
- return true;
+ return true;
}
};
@@ -139,98 +132,82 @@ public abstract class CommonTagActions {
*/
public static final TagAction TA_BODY = new TagAction() {
public boolean start(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
instance.flushBlock();
instance.inBody++;
return false;
}
- public boolean end(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(final BoilerpipeHTMLContentHandler instance) {
instance.flushBlock();
instance.inBody--;
return false;
}
public boolean changesTagLevel() {
- return true;
+ return true;
}
};
/**
- * Marks this tag a simple "inline" element, which generates whitespace, but no new block.
+ * Marks this tag a simple "inline" element, which neither generates whitespace, nor a new block.
*/
- public static final TagAction TA_INLINE_WHITESPACE = new TagAction() {
+ public static final TagAction TA_INLINE_NO_WHITESPACE = new TagAction() {
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
- instance.addWhitespaceIfNecessary();
+ final Attributes atts) {
return false;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
- instance.addWhitespaceIfNecessary();
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
return false;
}
-
+
public boolean changesTagLevel() {
- return false;
+ return false;
}
};
-
- /**
- * @deprecated Use {@link #TA_INLINE_WHITESPACE} instead
- */
- @Deprecated
- public static final TagAction TA_INLINE = TA_INLINE_WHITESPACE;
-
+ private static final Pattern PAT_FONT_SIZE = Pattern
+ .compile("([\\+\\-]?)([0-9])");
+
/**
- * Marks this tag a simple "inline" element, which neither generates whitespace, nor a new block.
+ * Explicitly marks this tag a simple "block-level" element, which always generates whitespace
*/
- public static final TagAction TA_INLINE_NO_WHITESPACE = new TagAction() {
+ public static final TagAction TA_BLOCK_LEVEL = new TagAction() {
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
- return false;
+ final Attributes atts) {
+ return true;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
- return false;
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
+ return true;
}
public boolean changesTagLevel() {
- return false;
+ return true;
}
};
- private static final Pattern PAT_FONT_SIZE = Pattern
- .compile("([\\+\\-]?)([0-9])");
-
+
/**
- * Explicitly marks this tag a simple "block-level" element, which always generates whitespace
+ * Explicitly marks this tag an inline-block element, which does not generate whitespace.
*/
- public static final TagAction TA_BLOCK_LEVEL = new TagAction() {
+ public static final TagAction TA_INLINE_BLOCK_LEVEL = new TagAction() {
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
- return true;
+ final Attributes atts) {
+ return false;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
- return true;
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
+ return false;
}
-
+
public boolean changesTagLevel() {
- return true;
+ return true;
}
- };
-
+ };
+
/**
* Special TagAction for the <code>&lt;FONT&gt;</code> tag, which keeps track of the
* absolute and relative font size.
@@ -238,8 +215,7 @@ public abstract class CommonTagActions {
public static final TagAction TA_FONT = new TagAction() {
public boolean start(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
String sizeAttr = atts.getValue("size");
if (sizeAttr != null) {
@@ -282,14 +258,13 @@ public abstract class CommonTagActions {
return false;
}
- public boolean end(final BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(final BoilerpipeHTMLContentHandler instance) {
instance.fontSizeStack.removeFirst();
return false;
}
public boolean changesTagLevel() {
- return false;
+ return false;
}
};
@@ -306,21 +281,19 @@ public abstract class CommonTagActions {
}
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
instance.addWhitespaceIfNecessary();
instance.addLabelAction(action);
return false;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
instance.addWhitespaceIfNecessary();
return false;
}
public boolean changesTagLevel() {
- return false;
+ return false;
}
}
@@ -337,19 +310,17 @@ public abstract class CommonTagActions {
}
public boolean start(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName,
- final Attributes atts) {
+ final Attributes atts) {
instance.addLabelAction(action);
return true;
}
- public boolean end(BoilerpipeHTMLContentHandler instance,
- final String localName, final String qName) {
+ public boolean end(BoilerpipeHTMLContentHandler instance) {
return true;
}
public boolean changesTagLevel() {
- return true;
+ return true;
}
}
}

Powered by Google App Engine
This is Rietveld 408576698