| Index: boilerpipe-core/src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java
|
| diff --git a/boilerpipe-core/src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java b/boilerpipe-core/src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java
|
| index 510c47fc6e913aeab31aec5cfe61d2412e920988..8399b5b92f1d2ce09ed9310b5ad0af8115a69749 100644
|
| --- a/boilerpipe-core/src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java
|
| +++ b/boilerpipe-core/src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java
|
| @@ -17,18 +17,19 @@
|
| */
|
| package de.l3s.boilerpipe.filters.heuristics;
|
|
|
| -import java.util.Iterator;
|
| -import java.util.List;
|
| -
|
| import de.l3s.boilerpipe.BoilerpipeFilter;
|
| import de.l3s.boilerpipe.BoilerpipeProcessingException;
|
| import de.l3s.boilerpipe.document.TextBlock;
|
| import de.l3s.boilerpipe.document.TextDocument;
|
| +import de.l3s.boilerpipe.labels.DefaultLabels;
|
| +
|
| +import java.util.Iterator;
|
| +import java.util.List;
|
|
|
| /**
|
| * Fuses adjacent blocks if their distance (in blocks) does not exceed a certain limit.
|
| * This probably makes sense only in cases where an upstream filter already has removed some blocks.
|
| - *
|
| + *
|
| * @author Christian Kohlschütter
|
| */
|
| public final class BlockProximityFusion implements BoilerpipeFilter {
|
| @@ -52,7 +53,7 @@ public final class BlockProximityFusion implements BoilerpipeFilter {
|
| * Creates a new {@link BlockProximityFusion} instance.
|
| *
|
| * @param maxBlocksDistance The maximum distance in blocks.
|
| - * @param contentOnly
|
| + * @param contentOnly
|
| */
|
| public BlockProximityFusion(final int maxBlocksDistance,
|
| final boolean contentOnly, final boolean sameTagLevelOnly) {
|
| @@ -61,6 +62,7 @@ public final class BlockProximityFusion implements BoilerpipeFilter {
|
| this.sameTagLevelOnly = sameTagLevelOnly;
|
| }
|
|
|
| + @Override
|
| public boolean process(TextDocument doc)
|
| throws BoilerpipeProcessingException {
|
| List<TextBlock> textBlocks = doc.getTextBlocks();
|
| @@ -110,6 +112,9 @@ public final class BlockProximityFusion implements BoilerpipeFilter {
|
| if(ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) {
|
| ok = false;
|
| }
|
| + if (prevBlock.hasLabel(DefaultLabels.TITLE) != block.hasLabel(DefaultLabels.TITLE)) {
|
| + ok = false;
|
| + }
|
| if (ok) {
|
| prevBlock.mergeNext(block);
|
| it.remove();
|
|
|