Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1507373003: Clean up attributes of image elements (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
79 assertTrue(parser != null); 79 assertTrue(parser != null);
80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle());
81 81
82 Document.get().setTitle(TITLE_TEXT); 82 Document.get().setTitle(TITLE_TEXT);
83 83
84 ContentExtractor extractor = new ContentExtractor(mRoot); 84 ContentExtractor extractor = new ContentExtractor(mRoot);
85 assertEquals("OpenGraph title should be picked over document.title", 85 assertEquals("OpenGraph title should be picked over document.title",
86 MARKUP_PARSER_TITLE, extractor.extractTitle()); 86 MARKUP_PARSER_TITLE, extractor.extractTitle());
87 } 87 }
88 88
89 public void testImageWithSrcset() { 89 public void testImage() {
90 // Test the absolute and different kinds of relative URLs for image sour ces, 90 // Test the absolute and different kinds of relative URLs for image sour ces,
91 // and also add an extra comma (,) as malformed srcset syntax for robust ness. 91 // and also add an extra comma (,) as malformed srcset syntax for robust ness.
92 // Also test images in WebImage, WebText, and WebTable.
wychen 2015/12/09 05:17:40 TODO(wychen): how do I get images inside a WebText
mdjones 2015/12/09 17:40:16 This is an unsolved problem. Since tables are curr
92 final String html = 93 final String html =
93 "<h1>" + CONTENT_TEXT + "</h1>" + 94 "<h1>" + CONTENT_TEXT + "</h1>" +
94 "<img src=\"image\" srcset=\"image200 200w, //example.org/image400 4 00w\">" + 95 "<img id=\"a\" style=\"a\" align=\"left\" src=\"image\" srcset=\"ima ge200 200w, //example.org/image400 400w\">" +
96 "<img id=\"a\" style=\"a\" align=\"left\" src=\"image2\">" +
95 "<table role=\"grid\"><tbody><tr><td>" + 97 "<table role=\"grid\"><tbody><tr><td>" +
96 "<img src=\"/image\" srcset=\"https://example.com/image2x 2x, /i mage4x 4x,\">" + 98 "<img id=\"a\" style=\"a\" align=\"left\" src=\"/image\" srcset= \"https://example.com/image2x 2x, /image4x 4x,\">" +
99 "<img id=\"a\" style=\"a\" align=\"left\" src=\"/image2\">" +
97 "</td></tr></tbody></table>" + 100 "</td></tr></tbody></table>" +
98 "<p>" + CONTENT_TEXT + "</p>"; 101 "<p>" + CONTENT_TEXT + "</p>";
99 102
100 final String expected = 103 final String expected =
101 "<h1>" + CONTENT_TEXT + "</h1>" + 104 "<h1>" + CONTENT_TEXT + "</h1>" +
102 "<img src=\"http://example.com/path/image\" " + 105 "<img src=\"http://example.com/path/image\" " +
103 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + 106 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" +
107 "<img src=\"http://example.com/path/image2\">" +
104 "<table role=\"grid\"><tbody><tr><td>" + 108 "<table role=\"grid\"><tbody><tr><td>" +
105 "<img src=\"http://example.com/image\" " + 109 "<img src=\"http://example.com/image\" " +
106 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + 110 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" +
111 "<img src=\"http://example.com/image2\">" +
107 "</td></tr></tbody></table>" + 112 "</td></tr></tbody></table>" +
108 "<p>" + CONTENT_TEXT + "</p>"; 113 "<p>" + CONTENT_TEXT + "</p>";
109 114
110 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); 115 mHead.setInnerHTML("<base href=\"http://example.com/path/\">");
111 mBody.setInnerHTML(html); 116 mBody.setInnerHTML(html);
112 117
113 ContentExtractor extractor = new ContentExtractor(mRoot); 118 ContentExtractor extractor = new ContentExtractor(mRoot);
114 String extractedContent = extractor.extractContent(); 119 String extractedContent = extractor.extractContent();
115 120
116 assertEquals(expected, 121 assertEquals(expected,
(...skipping 27 matching lines...) Expand all
144 } 149 }
145 150
146 public void testRemoveStyleAttributes() { 151 public void testRemoveStyleAttributes() {
147 String html = 152 String html =
148 "<h1 style=\"font-weight: folder\">" + 153 "<h1 style=\"font-weight: folder\">" +
149 CONTENT_TEXT + 154 CONTENT_TEXT +
150 "</h1>" + 155 "</h1>" +
151 "<p style=\"\">" + 156 "<p style=\"\">" +
152 CONTENT_TEXT + 157 CONTENT_TEXT +
153 "</p>" + 158 "</p>" +
159 "<img style=\"align: left\" src=\"/test.png\">" +
154 "<table style=\"position: absolute\">" + 160 "<table style=\"position: absolute\">" +
155 "<tbody style=\"font-size: 2\">" + 161 "<tbody style=\"font-size: 2\">" +
156 "<tr style=\"z-index: 0\">" + 162 "<tr style=\"z-index: 0\">" +
157 "<th style=\"top: 0px\">" + CONTENT_TEXT + "</th>" + 163 "<th style=\"top: 0px\">" + CONTENT_TEXT +
164 "<img style=\"align: left\" src=\"/test.png\">" +
165 "</th>" +
158 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + 166 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" +
159 "</tr><tr style=\"left: 0\">" + 167 "</tr><tr style=\"left: 0\">" +
160 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" + 168 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" +
161 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + 169 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" +
162 "</tr>" + 170 "</tr>" +
163 "</tbody>" + 171 "</tbody>" +
164 "</table>"; 172 "</table>";
165 173
166 final String expected = 174 final String expected =
167 "<h1>" + 175 "<h1>" +
168 CONTENT_TEXT + 176 CONTENT_TEXT +
169 "</h1>" + 177 "</h1>" +
170 "<p>" + 178 "<p>" +
171 CONTENT_TEXT + 179 CONTENT_TEXT +
172 "</p>" + 180 "</p>" +
181 "<img src=\"http://example.com/test.png\">" +
173 "<table>" + 182 "<table>" +
174 "<tbody>" + 183 "<tbody>" +
175 "<tr>" + 184 "<tr>" +
176 "<th>" + CONTENT_TEXT + "</th>" + 185 "<th>" + CONTENT_TEXT +
186 "<img src=\"http://example.com/test.png\">" +
187 "</th>" +
177 "<th>" + CONTENT_TEXT + "</th>" + 188 "<th>" + CONTENT_TEXT + "</th>" +
178 "</tr><tr>" + 189 "</tr><tr>" +
179 "<td>" + CONTENT_TEXT + "</td>" + 190 "<td>" + CONTENT_TEXT + "</td>" +
180 "<td>" + CONTENT_TEXT + "</td>" + 191 "<td>" + CONTENT_TEXT + "</td>" +
181 "</tr>" + 192 "</tr>" +
182 "</tbody>" + 193 "</tbody>" +
183 "</table>"; 194 "</table>";
184 195
196 mHead.setInnerHTML("<base href=\"http://example.com/\">");
185 mBody.setInnerHTML(html); 197 mBody.setInnerHTML(html);
186 198
187 ContentExtractor extractor = new ContentExtractor(mRoot); 199 ContentExtractor extractor = new ContentExtractor(mRoot);
188 String extractedContent = extractor.extractContent(); 200 String extractedContent = extractor.extractContent();
189 assertEquals(expected, 201 assertEquals(expected,
190 TestUtil.removeAllDirAttributes(extractedContent)); 202 TestUtil.removeAllDirAttributes(extractedContent));
191 } 203 }
192 204
193 public void testPreserveOrderedList() { 205 public void testPreserveOrderedList() {
194 Element outerListTag = Document.get().createElement("OL"); 206 Element outerListTag = Document.get().createElement("OL");
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 586
575 final String htmlArticle = 587 final String htmlArticle =
576 "<h1>" + CONTENT_TEXT + "</h1>" + 588 "<h1>" + CONTENT_TEXT + "</h1>" +
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 589 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 590 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
579 591
580 // Non-article schema.org types should not use the fast path. 592 // Non-article schema.org types should not use the fast path.
581 assertExtractor(expected, htmlArticle); 593 assertExtractor(expected, htmlArticle);
582 } 594 }
583 } 595 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698