Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(967)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1507373003: Clean up attributes of image elements (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: add todos Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
79 assertTrue(parser != null); 79 assertTrue(parser != null);
80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle());
81 81
82 Document.get().setTitle(TITLE_TEXT); 82 Document.get().setTitle(TITLE_TEXT);
83 83
84 ContentExtractor extractor = new ContentExtractor(mRoot); 84 ContentExtractor extractor = new ContentExtractor(mRoot);
85 assertEquals("OpenGraph title should be picked over document.title", 85 assertEquals("OpenGraph title should be picked over document.title",
86 MARKUP_PARSER_TITLE, extractor.extractTitle()); 86 MARKUP_PARSER_TITLE, extractor.extractTitle());
87 } 87 }
88 88
89 public void testImageWithSrcset() { 89 public void testImage() {
90 // Test the absolute and different kinds of relative URLs for image sour ces, 90 // Test the absolute and different kinds of relative URLs for image sour ces,
91 // and also add an extra comma (,) as malformed srcset syntax for robust ness. 91 // and also add an extra comma (,) as malformed srcset syntax for robust ness.
92 // Also test images in WebImage and WebTable.
93 // TODO(wychen): add images in WebText when it is supported.
92 final String html = 94 final String html =
93 "<h1>" + CONTENT_TEXT + "</h1>" + 95 "<h1>" + CONTENT_TEXT + "</h1>" +
94 "<img src=\"image\" srcset=\"image200 200w, //example.org/image400 4 00w\">" + 96 "<img id=\"a\" style=\"typo\" align=\"left\" src=\"image\" srcset=\" image200 200w, //example.org/image400 400w\">" +
97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" +
95 "<table role=\"grid\"><tbody><tr><td>" + 98 "<table role=\"grid\"><tbody><tr><td>" +
96 "<img src=\"/image\" srcset=\"https://example.com/image2x 2x, /i mage4x 4x,\">" + 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" +
100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" +
97 "</td></tr></tbody></table>" + 101 "</td></tr></tbody></table>" +
98 "<p>" + CONTENT_TEXT + "</p>"; 102 "<p>" + CONTENT_TEXT + "</p>";
99 103
100 final String expected = 104 final String expected =
101 "<h1>" + CONTENT_TEXT + "</h1>" + 105 "<h1>" + CONTENT_TEXT + "</h1>" +
102 "<img src=\"http://example.com/path/image\" " + 106 "<img src=\"http://example.com/path/image\" " +
103 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" +
108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" +
104 "<table role=\"grid\"><tbody><tr><td>" + 109 "<table role=\"grid\"><tbody><tr><td>" +
105 "<img src=\"http://example.com/image\" " + 110 "<img alt=\"b\" src=\"http://example.com/image\" " +
106 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + 111 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" +
112 "<img src=\"http://example.com/image2\">" +
107 "</td></tr></tbody></table>" + 113 "</td></tr></tbody></table>" +
108 "<p>" + CONTENT_TEXT + "</p>"; 114 "<p>" + CONTENT_TEXT + "</p>";
109 115
110 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); 116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">");
111 mBody.setInnerHTML(html); 117 mBody.setInnerHTML(html);
112 118
113 ContentExtractor extractor = new ContentExtractor(mRoot); 119 ContentExtractor extractor = new ContentExtractor(mRoot);
114 String extractedContent = extractor.extractContent(); 120 String extractedContent = extractor.extractContent();
115 121
116 assertEquals(expected, 122 assertEquals(expected,
(...skipping 27 matching lines...) Expand all
144 } 150 }
145 151
146 public void testRemoveStyleAttributes() { 152 public void testRemoveStyleAttributes() {
147 String html = 153 String html =
148 "<h1 style=\"font-weight: folder\">" + 154 "<h1 style=\"font-weight: folder\">" +
149 CONTENT_TEXT + 155 CONTENT_TEXT +
150 "</h1>" + 156 "</h1>" +
151 "<p style=\"\">" + 157 "<p style=\"\">" +
152 CONTENT_TEXT + 158 CONTENT_TEXT +
153 "</p>" + 159 "</p>" +
160 "<img style=\"align: left\" src=\"/test.png\">" +
154 "<table style=\"position: absolute\">" + 161 "<table style=\"position: absolute\">" +
155 "<tbody style=\"font-size: 2\">" + 162 "<tbody style=\"font-size: 2\">" +
156 "<tr style=\"z-index: 0\">" + 163 "<tr style=\"z-index: 0\">" +
157 "<th style=\"top: 0px\">" + CONTENT_TEXT + "</th>" + 164 "<th style=\"top: 0px\">" + CONTENT_TEXT +
165 "<img style=\"align: left\" src=\"/test.png\">" +
166 "</th>" +
158 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + 167 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" +
159 "</tr><tr style=\"left: 0\">" + 168 "</tr><tr style=\"left: 0\">" +
160 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" + 169 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" +
161 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + 170 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" +
162 "</tr>" + 171 "</tr>" +
163 "</tbody>" + 172 "</tbody>" +
164 "</table>"; 173 "</table>";
165 174
166 final String expected = 175 final String expected =
167 "<h1>" + 176 "<h1>" +
168 CONTENT_TEXT + 177 CONTENT_TEXT +
169 "</h1>" + 178 "</h1>" +
170 "<p>" + 179 "<p>" +
171 CONTENT_TEXT + 180 CONTENT_TEXT +
172 "</p>" + 181 "</p>" +
182 "<img src=\"http://example.com/test.png\">" +
173 "<table>" + 183 "<table>" +
174 "<tbody>" + 184 "<tbody>" +
175 "<tr>" + 185 "<tr>" +
176 "<th>" + CONTENT_TEXT + "</th>" + 186 "<th>" + CONTENT_TEXT +
187 "<img src=\"http://example.com/test.png\">" +
188 "</th>" +
177 "<th>" + CONTENT_TEXT + "</th>" + 189 "<th>" + CONTENT_TEXT + "</th>" +
178 "</tr><tr>" + 190 "</tr><tr>" +
179 "<td>" + CONTENT_TEXT + "</td>" + 191 "<td>" + CONTENT_TEXT + "</td>" +
180 "<td>" + CONTENT_TEXT + "</td>" + 192 "<td>" + CONTENT_TEXT + "</td>" +
181 "</tr>" + 193 "</tr>" +
182 "</tbody>" + 194 "</tbody>" +
183 "</table>"; 195 "</table>";
184 196
197 mHead.setInnerHTML("<base href=\"http://example.com/\">");
185 mBody.setInnerHTML(html); 198 mBody.setInnerHTML(html);
186 199
187 ContentExtractor extractor = new ContentExtractor(mRoot); 200 ContentExtractor extractor = new ContentExtractor(mRoot);
188 String extractedContent = extractor.extractContent(); 201 String extractedContent = extractor.extractContent();
189 assertEquals(expected, 202 assertEquals(expected,
190 TestUtil.removeAllDirAttributes(extractedContent)); 203 TestUtil.removeAllDirAttributes(extractedContent));
191 } 204 }
192 205
193 public void testPreserveOrderedList() { 206 public void testPreserveOrderedList() {
194 Element outerListTag = Document.get().createElement("OL"); 207 Element outerListTag = Document.get().createElement("OL");
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 587
575 final String htmlArticle = 588 final String htmlArticle =
576 "<h1>" + CONTENT_TEXT + "</h1>" + 589 "<h1>" + CONTENT_TEXT + "</h1>" +
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 590 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 591 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
579 592
580 // Non-article schema.org types should not use the fast path. 593 // Non-article schema.org types should not use the fast path.
581 assertExtractor(expected, htmlArticle); 594 assertExtractor(expected, htmlArticle);
582 } 595 }
583 } 596 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/webdocument/WebText.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698