| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * boilerpipe | 6 * boilerpipe |
| 7 * | 7 * |
| 8 * Copyright (c) 2009 Christian Kohlschütter | 8 * Copyright (c) 2009 Christian Kohlschütter |
| 9 * | 9 * |
| 10 * The author licenses this file to You under the Apache License, Version 2.0 | 10 * The author licenses this file to You under the Apache License, Version 2.0 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 61 + ")", | 61 + ")", |
| 62 "i"); | 62 "i"); |
| 63 | 63 |
| 64 public static boolean isTerminatingText(String longText) { | 64 public static boolean isTerminatingText(String longText) { |
| 65 return REG_TERMINATING.test(longText); | 65 return REG_TERMINATING.test(longText); |
| 66 } | 66 } |
| 67 | 67 |
| 68 public static boolean isTerminating(TextBlock tb) { | 68 public static boolean isTerminating(TextBlock tb) { |
| 69 if (tb.getNumWords() > 14) return false; | 69 if (tb.getNumWords() > 14) return false; |
| 70 String text = StringUtil.jsTrim(tb.getText()); | 70 String text = StringUtil.jsTrim(tb.getText()); |
| 71 |
| 71 if (text.length() >= 8) { | 72 if (text.length() >= 8) { |
| 72 return isTerminatingText(text); | 73 return isTerminatingText(text); |
| 73 } else if (tb.getLinkDensity() == 1.0) { | 74 } else if (tb.getLinkDensity() == 1.0) { |
| 74 return text.equals("Comment"); | 75 return text.equals("Comment"); |
| 76 } else if (text.equals("Shares")) { |
| 77 // Skip social and sharing elements. |
| 78 // See crbug.com/692553 |
| 79 return true; |
| 75 } | 80 } |
| 76 return false; | 81 return false; |
| 77 } | 82 } |
| 78 | 83 |
| 79 @Override | 84 @Override |
| 80 public boolean process(TextDocument doc) { | 85 public boolean process(TextDocument doc) { |
| 81 boolean changes = false; | 86 boolean changes = false; |
| 82 | 87 |
| 83 for (TextBlock tb : doc.getTextBlocks()) { | 88 for (TextBlock tb : doc.getTextBlocks()) { |
| 84 if (isTerminating(tb)) { | 89 if (isTerminating(tb)) { |
| 85 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT); | 90 tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT); |
| 86 changes = true; | 91 changes = true; |
| 87 } | 92 } |
| 88 } | 93 } |
| 89 | 94 |
| 90 return changes; | 95 return changes; |
| 91 } | 96 } |
| 92 } | 97 } |
| OLD | NEW |