Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(532)

Unified Diff: boilerpipe-core/src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java

Issue 296113004: Start using computed style instead of default tag actions. (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Fixed nit. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: boilerpipe-core/src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java
diff --git a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java b/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java
deleted file mode 100644
index 594aa546b50002b665be68fbdc494eaad5ee75c4..0000000000000000000000000000000000000000
--- a/boilerpipe-core/src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package de.l3s.boilerpipe.sax;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import com.dom_distiller.client.sax.Attributes;
-
-import de.l3s.boilerpipe.document.TextBlock;
-import de.l3s.boilerpipe.labels.DefaultLabels;
-import de.l3s.boilerpipe.labels.LabelAction;
-
-/**
- * Assigns labels for element CSS classes and ids to the corresponding
- * {@link TextBlock}. CSS classes are prefixed by
- * <code>{@link DefaultLabels#MARKUP_PREFIX}.</code>, and IDs are prefixed by
- * <code>{@link DefaultLabels#MARKUP_PREFIX}#</code>
- *
- * @author Christian Kohlschütter
- */
-public final class MarkupTagAction implements TagAction {
-
- private final boolean isBlockLevel;
- private LinkedList<List<String>> labelStack = new LinkedList<List<String>>();
-
- public MarkupTagAction(final boolean isBlockLevel) {
- this.isBlockLevel = isBlockLevel;
- }
-
- private static final Pattern PAT_NUM = Pattern.compile("[0-9]+");
-
- @Override
- public boolean start(BoilerpipeHTMLContentHandler instance,
- String localName, String qName, Attributes atts) {
- List<String> labels = new ArrayList<String>(5);
- labels.add(DefaultLabels.MARKUP_PREFIX + localName);
-
- String classVal = atts.getValue("class");
-
- if (classVal != null && classVal.length() > 0) {
- classVal = PAT_NUM.matcher(classVal).replaceAll("#");
- classVal = classVal.trim();
- String[] vals = classVal.split("[ ]+");
- labels.add(DefaultLabels.MARKUP_PREFIX + "."
- + classVal.replace(' ', '.'));
- if (vals.length > 1) {
- for (String s : vals) {
- labels.add(DefaultLabels.MARKUP_PREFIX + "." + s);
- }
- }
- }
-
- String id = atts.getValue("id");
- if (id != null && id.length() > 0) {
- id = PAT_NUM.matcher(id).replaceAll("#");
- labels.add(DefaultLabels.MARKUP_PREFIX + "#" + id);
- }
-
- Set<String> ancestors = getAncestorLabels();
- List<String> labelsWithAncestors = new ArrayList<String>(
- (ancestors.size() + 1) * labels.size());
-
- for (String l : labels) {
- for (String an : ancestors) {
- labelsWithAncestors.add(an);
- labelsWithAncestors.add(an + " " + l);
- }
- labelsWithAncestors.add(l);
- }
-
- instance.addLabelAction(new LabelAction(labelsWithAncestors
- .toArray(new String[labelsWithAncestors.size()])));
-
- labelStack.add(labels);
-
- return isBlockLevel;
- }
-
- @Override
- public boolean end(BoilerpipeHTMLContentHandler instance, String localName,
- String qName) {
-
- labelStack.removeLast();
- return isBlockLevel;
- }
-
- public boolean changesTagLevel() {
- return isBlockLevel;
- }
-
- private Set<String> getAncestorLabels() {
- Set<String> set = new HashSet<String>();
- for (List<String> labels : labelStack) {
- if (labels == null) {
- continue;
- }
- set.addAll(labels);
- }
- return set;
- }
-}

Powered by Google App Engine
This is Rietveld 408576698