OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * boilerpipe | 6 * boilerpipe |
7 * | 7 * |
8 * Copyright (c) 2009 Christian Kohlschütter | 8 * Copyright (c) 2009 Christian Kohlschütter |
9 * | 9 * |
10 * The author licenses this file to You under the Apache License, Version 2.0 | 10 * The author licenses this file to You under the Apache License, Version 2.0 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
61 + ")", | 61 + ")", |
62 "i"); | 62 "i"); |
63 | 63 |
64 public static boolean isTerminatingText(String longText) { | 64 public static boolean isTerminatingText(String longText) { |
65 return REG_TERMINATING.test(longText); | 65 return REG_TERMINATING.test(longText); |
66 } | 66 } |
67 | 67 |
68 public static boolean isTerminating(TextBlock tb) { | 68 public static boolean isTerminating(TextBlock tb) { |
69 if (tb.getNumWords() > 14) return false; | 69 if (tb.getNumWords() > 14) return false; |
70 String text = StringUtil.jsTrim(tb.getText()); | 70 String text = StringUtil.jsTrim(tb.getText()); |
| 71 |
71 if (text.length() >= 8) { | 72 if (text.length() >= 8) { |
72 return isTerminatingText(text); | 73 return isTerminatingText(text); |
73 } else if (tb.getLinkDensity() == 1.0) { | 74 } else if (tb.getLinkDensity() == 1.0) { |
74 return text.equals("Comment"); | 75 return text.equals("Comment"); |
| 76 } else if (text.equals("Shares")) { |
| 77 // Skip social and sharing elements. |
| 78 // See crbug.com/692553 |
| 79 return true; |
75 } | 80 } |
76 return false; | 81 return false; |
77 } | 82 } |
78 | 83 |
79 @Override | 84 @Override |
80 public boolean process(TextDocument doc) { | 85 public boolean process(TextDocument doc) { |
81 boolean changes = false; | 86 boolean changes = false; |
82 | 87 |
83 for (TextBlock tb : doc.getTextBlocks()) { | 88 for (TextBlock tb : doc.getTextBlocks()) { |
84 if (isTerminating(tb)) { | 89 if (isTerminating(tb)) { |
85 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT); | 90 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT); |
86 changes = true; | 91 changes = true; |
87 } | 92 } |
88 } | 93 } |
89 | 94 |
90 return changes; | 95 return changes; |
91 } | 96 } |
92 } | 97 } |
OLD | NEW |