Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(533)

Side by Side Diff: java/org/chromium/distiller/webdocument/ElementAction.java

Issue 2596283004: Use stricter comment-detecting heuristics (Closed)
Patch Set: Created 3 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | javatests/org/chromium/distiller/webdocument/ElementActionTest.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument; 5 package org.chromium.distiller.webdocument;
6 6
7 import org.chromium.distiller.DomUtil; 7 import org.chromium.distiller.DomUtil;
8 import org.chromium.distiller.labels.DefaultLabels; 8 import org.chromium.distiller.labels.DefaultLabels;
9 9
10 import com.google.gwt.core.client.JavaScriptObject; 10 import com.google.gwt.core.client.JavaScriptObject;
11 import com.google.gwt.core.client.JsArrayString; 11 import com.google.gwt.core.client.JsArrayString;
12 import com.google.gwt.dom.client.Element; 12 import com.google.gwt.dom.client.Element;
13 import com.google.gwt.dom.client.Style; 13 import com.google.gwt.dom.client.Style;
14 import com.google.gwt.regexp.shared.RegExp; 14 import com.google.gwt.regexp.shared.RegExp;
15 15
16 public class ElementAction { 16 public class ElementAction {
17 public boolean changesTagLevel = false; 17 public boolean changesTagLevel = false;
18 public boolean flush = false; 18 public boolean flush = false;
19 public boolean isAnchor = false; 19 public boolean isAnchor = false;
20 public JsArrayString labels = JavaScriptObject.createArray().<JsArrayString> cast(); 20 public JsArrayString labels = JavaScriptObject.createArray().<JsArrayString> cast();
21 21
22 private static final RegExp REG_COMMENT = RegExp.compile("\\bcomments?\\b"); 22 private static final RegExp REG_COMMENT = RegExp.compile("\\bcomments?\\b");
23 private static final int MAX_CLASS_COUNT = 5; 23 private static final int MAX_CLASS_COUNT = 2;
mdjones 2017/01/04 14:29:21 nit: the way this is used seems to be as a minimum
wychen 2017/01/04 14:37:51 What do you mean? This constant is the largest all
mdjones 2017/01/04 15:14:33 Ah, I was looking at it from: "if class count does
24 24
25 public static ElementAction getForElement(Element element) { 25 public static ElementAction getForElement(Element element) {
26 Style style = DomUtil.getComputedStyle(element); 26 Style style = DomUtil.getComputedStyle(element);
27 ElementAction action = new ElementAction(); 27 ElementAction action = new ElementAction();
28 String tagName = element.getTagName(); 28 String tagName = element.getTagName();
29 switch (style.getDisplay()) { 29 switch (style.getDisplay()) {
30 case "inline": 30 case "inline":
31 break; 31 break;
32 case "inline-block": 32 case "inline-block":
33 case "inline-flex": 33 case "inline-flex":
(...skipping 21 matching lines...) Expand all
55 // table-column-group 55 // table-column-group
56 // table-cell 56 // table-cell
57 // table-caption 57 // table-caption
58 // flex 58 // flex
59 default: 59 default:
60 action.flush = true; 60 action.flush = true;
61 action.changesTagLevel = true; 61 action.changesTagLevel = true;
62 break; 62 break;
63 } 63 }
64 64
65 if (!"HTML".equals(tagName) && !"BODY".equals(tagName)) { 65 if (!"HTML".equals(tagName) && !"BODY".equals(tagName) && !"ARTICLE".equ als(tagName)) {
66 String className = element.getAttribute("class"); 66 String className = element.getAttribute("class");
67 int classCount = DomUtil.getClassList(element).length(); 67 int classCount = DomUtil.getClassList(element).length();
68 String id = element.getAttribute("id"); 68 String id = element.getAttribute("id");
69 if ((REG_COMMENT.test(className) || REG_COMMENT.test(id)) && 69 if ((REG_COMMENT.test(className) || REG_COMMENT.test(id)) &&
70 classCount <= MAX_CLASS_COUNT) { 70 classCount <= MAX_CLASS_COUNT) {
71 action.labels.push(DefaultLabels.STRICTLY_NOT_CONTENT); 71 action.labels.push(DefaultLabels.STRICTLY_NOT_CONTENT);
72 } 72 }
73 73
74 switch (tagName) { 74 switch (tagName) {
75 case "ASIDE": 75 case "ASIDE":
(...skipping 28 matching lines...) Expand all
104 action.isAnchor = true; 104 action.isAnchor = true;
105 } 105 }
106 break; 106 break;
107 } 107 }
108 } 108 }
109 return action; 109 return action;
110 } 110 }
111 111
112 private ElementAction() {} 112 private ElementAction() {}
113 } 113 }
OLDNEW
« no previous file with comments | « no previous file | javatests/org/chromium/distiller/webdocument/ElementActionTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698