Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1508)

Side by Side Diff: java/org/chromium/distiller/filters/english/TerminatingBlocksFinder.java

Issue 2721133002: Skip some social sharing elements (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/DomConverter.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 /** 5 /**
6 * boilerpipe 6 * boilerpipe
7 * 7 *
8 * Copyright (c) 2009 Christian Kohlschütter 8 * Copyright (c) 2009 Christian Kohlschütter
9 * 9 *
10 * The author licenses this file to You under the Apache License, Version 2.0 10 * The author licenses this file to You under the Apache License, Version 2.0
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 + ")", 61 + ")",
62 "i"); 62 "i");
63 63
64 public static boolean isTerminatingText(String longText) { 64 public static boolean isTerminatingText(String longText) {
65 return REG_TERMINATING.test(longText); 65 return REG_TERMINATING.test(longText);
66 } 66 }
67 67
68 public static boolean isTerminating(TextBlock tb) { 68 public static boolean isTerminating(TextBlock tb) {
69 if (tb.getNumWords() > 14) return false; 69 if (tb.getNumWords() > 14) return false;
70 String text = StringUtil.jsTrim(tb.getText()); 70 String text = StringUtil.jsTrim(tb.getText());
71
71 if (text.length() >= 8) { 72 if (text.length() >= 8) {
72 return isTerminatingText(text); 73 return isTerminatingText(text);
73 } else if (tb.getLinkDensity() == 1.0) { 74 } else if (tb.getLinkDensity() == 1.0) {
74 return text.equals("Comment"); 75 return text.equals("Comment");
76 } else if (text.equals("Shares")) {
77 // Skip social and sharing elements.
78 // See crbug.com/692553
79 return true;
75 } 80 }
76 return false; 81 return false;
77 } 82 }
78 83
79 @Override 84 @Override
80 public boolean process(TextDocument doc) { 85 public boolean process(TextDocument doc) {
81 boolean changes = false; 86 boolean changes = false;
82 87
83 for (TextBlock tb : doc.getTextBlocks()) { 88 for (TextBlock tb : doc.getTextBlocks()) {
84 if (isTerminating(tb)) { 89 if (isTerminating(tb)) {
85 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT); 90 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT);
86 changes = true; 91 changes = true;
87 } 92 }
88 } 93 }
89 94
90 return changes; 95 return changes;
91 } 96 }
92 } 97 }
OLDNEW
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/DomConverter.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698