Index: boilerpipe-core/src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java |
diff --git a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java b/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java |
index d4da0c79cb61a0a8a5fdbe0d624d491c1e2d9339..b90f0c55026ce95ebdf663f4757089ba2fe007ea 100644 |
--- a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java |
+++ b/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java |
@@ -43,38 +43,11 @@ public class DefaultTagActionMap extends TagActionMap { |
setTagAction("EMBED", CommonTagActions.TA_IGNORABLE_ELEMENT); |
setTagAction("APPLET", CommonTagActions.TA_IGNORABLE_ELEMENT); |
setTagAction("LINK", CommonTagActions.TA_IGNORABLE_ELEMENT); |
+ setTagAction("NOSCRIPT", CommonTagActions.TA_IGNORABLE_ELEMENT); |
setTagAction("A", CommonTagActions.TA_ANCHOR_TEXT); |
setTagAction("BODY", CommonTagActions.TA_BODY); |
- setTagAction("STRIKE", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("U", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("B", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("I", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("EM", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("STRONG", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("SPAN", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- |
- |
- // New in 1.1 (especially to improve extraction quality from Wikipedia etc.) |
- setTagAction("SUP", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- |
- // New in 1.2 |
- setTagAction("CODE", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("TT", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("SUB", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("VAR", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- |
- |
- setTagAction("ABBR", CommonTagActions.TA_INLINE_WHITESPACE); |
nyquist
2014/05/28 23:51:33
These two are now treated as TA_INLINE_NO_WHITESPA
Yaron
2014/05/29 01:09:10
Seems correct
nyquist
2014/05/29 23:42:25
Also removed the now unused TagAction for this.
|
- setTagAction("ACRONYM", CommonTagActions.TA_INLINE_WHITESPACE); |
- |
- setTagAction("FONT", CommonTagActions.TA_INLINE_NO_WHITESPACE); // could also use TA_FONT |
- |
- // added in 1.1.1 |
- setTagAction("NOSCRIPT", CommonTagActions.TA_IGNORABLE_ELEMENT); |
- |
- // New in 1.3 |
setTagAction("LI", new CommonTagActions.BlockTagLabelAction( |
new LabelAction(DefaultLabels.LI))); |
setTagAction("H1", new CommonTagActions.BlockTagLabelAction( |
@@ -83,12 +56,5 @@ public class DefaultTagActionMap extends TagActionMap { |
new LabelAction(DefaultLabels.H2, DefaultLabels.HEADING))); |
setTagAction("H3", new CommonTagActions.BlockTagLabelAction( |
new LabelAction(DefaultLabels.H3, DefaultLabels.HEADING))); |
- |
- // New in DomDistiller |
- setTagAction("CITE", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("MARK", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("S", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("Q", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- setTagAction("TIME", CommonTagActions.TA_INLINE_NO_WHITESPACE); |
- } |
+ } |
} |