Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: javatests/org/chromium/distiller/DomUtilTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Comments addressed & master rebased Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import org.chromium.distiller.webdocument.WebTable; 7 import org.chromium.distiller.webdocument.WebTable;
8 8
9 import com.google.gwt.core.client.JsArray; 9 import com.google.gwt.core.client.JsArray;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
11 import com.google.gwt.dom.client.Element; 11 import com.google.gwt.dom.client.Element;
12 import com.google.gwt.dom.client.Node; 12 import com.google.gwt.dom.client.Node;
13 import com.google.gwt.dom.client.NodeList;
13 14
14 import java.util.Map; 15 import java.util.Map;
15 import java.util.List; 16 import java.util.List;
16 17
17 public class DomUtilTest extends DomDistillerJsTestCase { 18 public class DomUtilTest extends DomDistillerJsTestCase {
19 private static final String CONTENT_TEXT = "Lorem Ipsum Lorem Ipsum Lorem Ip sum.";
20
18 public void testGetAttributes() { 21 public void testGetAttributes() {
19 Element e = Document.get().createDivElement(); 22 Element e = Document.get().createDivElement();
20 e.setInnerHTML("<div style=\"width:50px; height:100px\" id=\"f\" class=\ "sdf\"></div>"); 23 e.setInnerHTML("<div style=\"width:50px; height:100px\" id=\"f\" class=\ "sdf\"></div>");
21 e = Element.as(e.getChildNodes().getItem(0)); 24 e = Element.as(e.getChildNodes().getItem(0));
22 JsArray<Node> jsAttrs = DomUtil.getAttributes(e); 25 JsArray<Node> jsAttrs = DomUtil.getAttributes(e);
23 assertEquals(3, jsAttrs.length()); 26 assertEquals(3, jsAttrs.length());
24 assertEquals("style", jsAttrs.get(0).getNodeName()); 27 assertEquals("style", jsAttrs.get(0).getNodeName());
25 assertEquals("width:50px; height:100px", jsAttrs.get(0).getNodeValue()); 28 assertEquals("width:50px; height:100px", jsAttrs.get(0).getNodeValue());
26 assertEquals("id", jsAttrs.get(1).getNodeName()); 29 assertEquals("id", jsAttrs.get(1).getNodeName());
27 assertEquals("f", jsAttrs.get(1).getNodeValue()); 30 assertEquals("f", jsAttrs.get(1).getNodeValue());
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 div.appendChild(div2); 105 div.appendChild(div2);
103 106
104 Element currDiv = TestUtil.createDiv(3); 107 Element currDiv = TestUtil.createDiv(3);
105 div2.appendChild(currDiv); 108 div2.appendChild(currDiv);
106 Element finalDiv1 = currDiv; 109 Element finalDiv1 = currDiv;
107 110
108 currDiv = TestUtil.createDiv(4); 111 currDiv = TestUtil.createDiv(4);
109 div2.appendChild(currDiv); 112 div2.appendChild(currDiv);
110 currDiv.appendChild(TestUtil.createDiv(5)); 113 currDiv.appendChild(TestUtil.createDiv(5));
111 114
112 assertEquals(div2, DomUtil.getNearestCommonAncestor(finalDiv1, currDiv.g etChild(0))); 115 assertEquals(div2, DomUtil.getNearestCommonAncestor(finalDiv1,
116 currDiv.getChild(0)));
117
118 NodeList<Element> nodeList = DomUtil.querySelectorAll(mRoot,
119 "[id=\"3\"],[id=\"5\"]");
120
113 assertEquals(div2, DomUtil.getNearestCommonAncestor( 121 assertEquals(div2, DomUtil.getNearestCommonAncestor(
114 DomUtil.querySelectorAll(mRoot, "[id=\"3\"],[id=\"5\"]"))); 122 TestUtil.nodeListToList(nodeList)));
123
115 } 124 }
116 125
117 /** 126 /**
118 * The tree graph is: 127 * The tree graph is:
119 * 1 - 2 - 3 128 * 1 - 2 - 3
120 */ 129 */
121 public void testNearestCommonAncestorIsRoot() { 130 public void testNearestCommonAncestorIsRoot() {
122 Element div = TestUtil.createDiv(1); 131 Element div = TestUtil.createDiv(1);
123 mBody.appendChild(div); 132 mBody.appendChild(div);
124 133
125 Element div2 = TestUtil.createDiv(2); 134 Element div2 = TestUtil.createDiv(2);
126 div.appendChild(div2); 135 div.appendChild(div2);
127 136
128 Element div3 = TestUtil.createDiv(3); 137 Element div3 = TestUtil.createDiv(3);
129 div2.appendChild(div3); 138 div2.appendChild(div3);
130 139
131 assertEquals(div, DomUtil.getNearestCommonAncestor(div, div3)); 140 assertEquals(div, DomUtil.getNearestCommonAncestor(div, div3));
141
142 NodeList<Element> nodeList = DomUtil.querySelectorAll(mRoot,
143 "[id=\"1\"],[id=\"3\"]");
144
132 assertEquals(div, DomUtil.getNearestCommonAncestor( 145 assertEquals(div, DomUtil.getNearestCommonAncestor(
133 DomUtil.querySelectorAll(mRoot, "[id=\"1\"],[id=\"3\"]"))); 146 TestUtil.nodeListToList(nodeList)));
134 } 147 }
135 148
136 public void testNodeDepth() { 149 public void testNodeDepth() {
137 Element div = TestUtil.createDiv(1); 150 Element div = TestUtil.createDiv(1);
138 151
139 Element div2 = TestUtil.createDiv(2); 152 Element div2 = TestUtil.createDiv(2);
140 div.appendChild(div2); 153 div.appendChild(div2);
141 154
142 Element div3 = TestUtil.createDiv(3); 155 Element div3 = TestUtil.createDiv(3);
143 div2.appendChild(div3); 156 div2.appendChild(div3);
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
371 for (int i = 0; i < mBody.getChildCount(); i++) { 384 for (int i = 0; i < mBody.getChildCount(); i++) {
372 DomUtil.stripImageElements(mBody.getChild(i)); 385 DomUtil.stripImageElements(mBody.getChild(i));
373 } 386 }
374 assertEquals(expected, mBody.getInnerHTML()); 387 assertEquals(expected, mBody.getInnerHTML());
375 388
376 mBody.setInnerHTML(html); 389 mBody.setInnerHTML(html);
377 DomUtil.stripImageElements(mBody); 390 DomUtil.stripImageElements(mBody);
378 assertEquals(expected, mBody.getInnerHTML()); 391 assertEquals(expected, mBody.getInnerHTML());
379 } 392 }
380 393
394 public void testIsVisibleByItsOffsetParentDisplayNone() {
395 String html =
396 "<div style=\"display: none;\">" +
397 "<div>Some Text</div>" +
398 "</div>";
399 mBody.setInnerHTML(html);
400 Element child = mBody.getFirstChildElement().getFirstChildElement();
401 assertFalse(DomUtil.isVisibleByItsOffset(child));
402 }
403
404 public void testIsVisibleByItsOffsetChildDisplayNone() {
405 String html =
406 "<div>" +
407 "<div style=\"display: none;\">Some Text</div>" +
408 "</div>";
409 mBody.setInnerHTML(html);
410 Element child = mBody.getFirstChildElement().getFirstChildElement();
411 assertFalse(DomUtil.isVisibleByItsOffset(child));
412 }
413
414 public void testIsVisibleByItsOffsetDisplayBlock() {
415 String html =
416 "<div>" +
417 "<div>Some Text</div>" +
418 "</div>";
419 mBody.setInnerHTML(html);
420 Element child = mBody.getFirstChildElement().getFirstChildElement();
421 assertTrue(DomUtil.isVisibleByItsOffset(child));
422 }
423
424 public void testOnlyProcessArticleElement() {
425 final String htmlArticle =
426 "<h1>" + CONTENT_TEXT + "</h1>" +
wychen 2016/06/02 05:56:01 All these tests should work without putting CONTEN
marcelorcorrea 2016/06/03 16:21:57 We tried using only empty elements but didn't work
wychen 2016/06/03 16:49:53 Ah. So it's about test again. Let's use the conte
marcelorcorrea 2016/06/06 13:17:57 Done.
427 "<article>" + CONTENT_TEXT + "</article>";
428
429 String expected = "<article>" + CONTENT_TEXT + "</article>";
430
431 Element result = getArticleElement(htmlArticle);
432 assertEquals(expected, result.getString());
433 }
434
435 public void testOnlyProcessArticleElementWithHiddenArticleElement() {
436 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
437 "<p>" + CONTENT_TEXT + "</p>";
438
439 final String htmlArticle =
440 "<h1>" + CONTENT_TEXT + "</h1>" +
441 "<article>" + paragraph + "</article>" +
442 "<article style=\"display:none\">" + paragraph +"</article>";
443
444 String expected = "<article>" + paragraph + "</article>";
445
446 Element result = getArticleElement(htmlArticle);
447 assertEquals(expected, result.getString());
448 }
449
450 public void testOnlyProcessArticleElementMultiple() {
451 final String htmlArticle =
452 "<h1>" + CONTENT_TEXT + "</h1>" +
453 "<article>" + CONTENT_TEXT + "</article>" +
454 "<article>" + CONTENT_TEXT + "</article>";
455
456 // The existence of multiple articles disables the fast path.
457 assertNull(getArticleElement(htmlArticle));
458 }
459
460 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() {
wychen 2016/06/02 05:56:01 This seems a bit redundant.
marcelorcorrea 2016/06/03 16:21:57 Done.
461 final String paragraph = "<p>" + CONTENT_TEXT + "</p><p>" +
462 CONTENT_TEXT + "</p>";
463
464 final String htmlArticle =
465 "<h1>" + CONTENT_TEXT + "</h1>" +
466 "<article>" + paragraph + "</article>" +
467 "<article style=\"display:none\">" + paragraph + "</article>" +
468 "<article>" + paragraph + "</article>";
469
470 // The existence of multiple articles disables the fast path.
471 assertNull(getArticleElement(htmlArticle));
472 }
473
474 public void testOnlyProcessOGArticle() {
wychen 2016/06/02 05:56:01 My bad. I mixed up open graph with schema.org back
marcelorcorrea 2016/06/03 16:21:57 Done.
475 final String paragraph = "<p>" + CONTENT_TEXT + "</p><p>" +
476 CONTENT_TEXT + "</p>";
477
478 final String htmlArticle =
479 "<h1>" + CONTENT_TEXT + "</h1>" +
480 "<div itemscope itemtype=\"http://schema.org/Article\">" +
481 paragraph +
482 "</div>";
483
484 final String expected =
485 "<div itemscope=\"\" " +
486 "itemtype=\"http://schema.org/Article\">" + paragraph +
487 "</div>";
488
489 Element result = getArticleElement(htmlArticle);
490 assertEquals(expected, result.getString());
491 }
492
493 public void testOnlyProcessOGArticleWithHiddenArticleElement() {
494 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
495 "<p>" + CONTENT_TEXT + "</p>";
496
497 final String htmlArticle =
498 "<h1>" + CONTENT_TEXT + "</h1>" +
499 "<div itemscope itemtype=\"http://schema.org/Article\">" +
500 paragraph + "</div>" +
501 "<div itemscope itemtype=\"http://schema.org/Article\" " +
502 "style=\"display:none\">" + paragraph +
503 "</div>";
504
505 String expected =
506 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
507 paragraph +
508 "</div>";
509
510 Element result = getArticleElement(htmlArticle);
511 assertEquals(expected, result.getString());
512 }
513
514 public void testOnlyProcessOGArticleNews() {
515 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
516 "<p>" + CONTENT_TEXT + "</p>";
517
518 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" +
521 paragraph +
522 "</div>";
523
524 final String expected =
525 "<div itemscope=\"\" " +
526 "itemtype=\"http://schema.org/NewsArticle\">" + paragraph +
527 "</div>";
528
529 Element result = getArticleElement(htmlArticle);
530 assertEquals(expected, result.getString());
531 }
532
533 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() {
wychen 2016/06/02 05:56:01 I think we only need to test one variation of sche
marcelorcorrea 2016/06/03 16:21:57 Done.
534 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
535 "<p>" + CONTENT_TEXT + "</p>";
536
537 final String htmlArticle =
538 "<h1>" + CONTENT_TEXT + "</h1>" +
539 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" +
540 paragraph +
541 "</div>" +
542 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" " +
543 "style=\"display:none\">" + paragraph +
544 "</div>";
545
546 String expected =
547 "<div itemscope=\"\" " +
548 "itemtype=\"http://schema.org/NewsArticle\">" + paragraph +
549 "</div>";
550
551 Element result = getArticleElement(htmlArticle);
552 assertEquals(expected, result.getString());
553 }
554
555 public void testOnlyProcessOGArticleBlog() {
556 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
557 "<p>" + CONTENT_TEXT + "</p>";
558
559 final String htmlArticle =
560 "<h1>" + CONTENT_TEXT + "</h1>" +
561 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" +
562 paragraph +
563 "</div>";
564
565 final String expected =
566 "<div itemscope=\"\" " +
567 "itemtype=\"http://schema.org/BlogPosting\">" +
568 paragraph +
569 "</div>";
570
571 Element result = getArticleElement(htmlArticle);
572 assertEquals(expected, result.getString());
573 }
574
575 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() {
wychen 2016/06/02 05:56:01 ditto
marcelorcorrea 2016/06/03 16:21:57 Done.
576 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
577 "<p>" + CONTENT_TEXT + "</p>";
578
579 final String htmlArticle =
580 "<h1>" + CONTENT_TEXT + "</h1>" +
581 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" +
582 paragraph +
583 "</div>" +
584 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" " +
585 "style=\"display:none\">" + paragraph +
586 "</div>";
587
588 final String expected =
589 "<div itemscope=\"\" " +
590 "itemtype=\"http://schema.org/BlogPosting\">" +
591 paragraph +
592 "</div>";
593
594 Element result = getArticleElement(htmlArticle);
595 assertEquals(expected, result.getString());
596 }
597
598 public void testOnlyProcessOGArticleNested() {
599 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
600
601 final String htmlArticle =
602 "<h1>" + CONTENT_TEXT + "</h1>" +
603 "<div itemscope itemtype=\"http://schema.org/Article\">" +
604 paragraph +
605 "<div itemscope itemtype=\"http://schema.org/Article\">" +
606 paragraph +
607 "</div>" +
608 "</div>";
609
610 final String expected =
611 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
612 paragraph +
613 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
614 paragraph +
615 "</div>" +
616 "</div>";
617
618 Element result = getArticleElement(htmlArticle);
619 assertEquals(expected, result.getString());
620 }
621
622 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() {
wychen 2016/06/02 05:56:01 Keep this, since this seems complicated enough.
marcelorcorrea 2016/06/03 16:21:57 Done.
623 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
624
625 final String htmlArticle =
626 "<h1>" + CONTENT_TEXT + "</h1>" +
627 "<div itemscope itemtype=\"http://schema.org/Article\">" +
628 paragraph +
629 "<div itemscope itemtype=\"http://schema.org/Article\">" +
630 paragraph +
631 "</div>" +
632 "<div itemscope itemtype=\"http://schema.org/Article\" " +
633 "style=\"display:none\">" + paragraph +
634 "</div>" +
635 "</div>";
636
637 final String expected =
638 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
639 paragraph +
640 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
641 paragraph + "</div>" +
642 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\" " +
643 "style=\"display:none\">" + paragraph + "</div>" +
644 "</div>";
645
646 Element result = getArticleElement(htmlArticle);
647 assertEquals(expected, result.getString());
648 }
649
650 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() {
wychen 2016/06/02 05:56:01 Also keep this.
marcelorcorrea 2016/06/03 16:21:57 Done.
651 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
652
653 final String htmlArticle =
654 "<h1>" + CONTENT_TEXT + "</h1>" +
655 "<div itemscope itemtype=\"http://schema.org/Article\">" +
656 paragraph +
657 "<div itemscope itemtype=\"http://schema.org/Article\">" +
658 paragraph +
659 "</div>" +
660 "</div>" +
661 "<div itemscope itemtype=\"http://schema.org/Article\" " +
662 "style=\"display:none\">" + paragraph + "</div>";
663
664 final String expected =
665 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
666 paragraph +
667 "<div itemscope=\"\" itemtype=\"http://schema.org/Article\">" +
668 paragraph +
669 "</div>" +
670 "</div>";
671
672 Element result = getArticleElement(htmlArticle);
673 assertEquals(expected, result.getString());
674 }
675
676 public void testOnlyProcessOGNonArticleMovie() {
677 final String paragraph = "<p>" + CONTENT_TEXT + "</p>" +
678 "<p>" + CONTENT_TEXT + "</p>";
679
680 final String htmlArticle =
681 "<h1>" + CONTENT_TEXT + "</h1>" +
682 "<div itemscope itemtype=\"http://schema.org/Movie\">" +
683 paragraph +
684 "</div>";
685
686 // Non-article schema.org types should not use the fast path.
687 Element result = getArticleElement(htmlArticle);
688 assertNull(result);
689 }
690
691 private Element getArticleElement(String html) {
692 mBody.setInnerHTML(html);
693 return DomUtil.getArticleElement(mRoot);
694 }
695
381 public void testGetArea() { 696 public void testGetArea() {
382 String elements = 697 String elements =
383 "<div style=\"width: 200px; height: 100px\">w</div>" + 698 "<div style=\"width: 200px; height: 100px\">w</div>" +
384 "<div style=\"width: 300px;\">" + 699 "<div style=\"width: 300px;\">" +
385 "<div style=\"width: 300px; height: 200px\"></div>" + 700 "<div style=\"width: 300px; height: 200px\"></div>" +
386 "</div>" + 701 "</div>" +
387 "<div style=\"width: 400px; height: 100px\">" + 702 "<div style=\"width: 400px; height: 100px\">" +
388 "<div style=\"height: 100%\"></div>" + 703 "<div style=\"height: 100%\"></div>" +
389 "</div>"; 704 "</div>";
390 mBody.setInnerHTML(elements); 705 mBody.setInnerHTML(elements);
391 706
392 Element element = mBody.getFirstChildElement(); 707 Element element = mBody.getFirstChildElement();
393 assertEquals(200*100, DomUtil.getArea(element)); 708 assertEquals(200*100, DomUtil.getArea(element));
394 709
395 element = element.getNextSiblingElement(); 710 element = element.getNextSiblingElement();
396 assertEquals(300*200, DomUtil.getArea(element)); 711 assertEquals(300*200, DomUtil.getArea(element));
397 712
398 element = element.getNextSiblingElement(); 713 element = element.getNextSiblingElement();
399 assertEquals(400*100, DomUtil.getArea(element)); 714 assertEquals(400*100, DomUtil.getArea(element));
400 715
401 element = element.getFirstChildElement(); 716 element = element.getFirstChildElement();
402 assertEquals(400*100, DomUtil.getArea(element)); 717 assertEquals(400*100, DomUtil.getArea(element));
403 } 718 }
404 } 719 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698