Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1705123002: Add support for Schema.org/Recipe Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: activate only for English pages Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
11 import com.google.gwt.dom.client.Element; 11 import com.google.gwt.dom.client.Element;
12 import com.google.gwt.dom.client.ImageElement; 12 import com.google.gwt.dom.client.ImageElement;
13 import com.google.gwt.dom.client.Node; 13 import com.google.gwt.dom.client.Node;
14 import com.google.gwt.dom.client.NodeList; 14 import com.google.gwt.dom.client.NodeList;
15 import com.google.gwt.dom.client.Style; 15 import com.google.gwt.dom.client.Style;
16 import com.google.gwt.dom.client.VideoElement; 16 import com.google.gwt.dom.client.VideoElement;
17 import com.google.gwt.http.client.URL; 17 import com.google.gwt.http.client.URL;
18 import com.google.gwt.regexp.shared.MatchResult;
19 import com.google.gwt.regexp.shared.RegExp;
18 20
19 import java.util.ArrayList; 21 import java.util.ArrayList;
20 import java.util.HashMap; 22 import java.util.HashMap;
21 import java.util.List; 23 import java.util.List;
22 import java.util.Map; 24 import java.util.Map;
23 25
24 public class DomUtil { 26 public class DomUtil {
25 /** 27 /**
26 * GWT does not provide a way to get a list of all attributes that have been explicitly set on a 28 * GWT does not provide a way to get a list of all attributes that have been explicitly set on a
27 * DOM element (only a way to query the value of a particular attribute). In javascript, this 29 * DOM element (only a way to query the value of a particular attribute). In javascript, this
(...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after
429 public void exit(Node n) { 431 public void exit(Node n) {
430 } 432 }
431 433
432 @Override 434 @Override
433 public void skip(Element e) { 435 public void skip(Element e) {
434 } 436 }
435 }).walk(root); 437 }).walk(root);
436 return nodes; 438 return nodes;
437 } 439 }
438 440
441 public static String formatDuration(String duration) {
442 RegExp pattern = RegExp.compile("^P(?:([0-9]+)Y)?" +
443 "(?:([0-9]+)M)?(?:([0-9]+)W)?(?:([0-9]+)D)?" +
444 "(T(?:([0-9]+)H)?(?:([0-9]+)M)?(?:([0-9]+)S)?)?$", "i");
445 MatchResult matchResult = pattern.exec(duration);
446 List<String> result = new ArrayList<>();
447 if (matchResult != null) {
448 if (matchResult.getGroup(1) != null) {
449 result.add(matchResult.getGroup(1) + " year(s)");
450 }
451 if (matchResult.getGroup(2) != null) {
452 result.add(matchResult.getGroup(2) + " month(s)");
453 }
454 if (matchResult.getGroup(3) != null) {
455 result.add(matchResult.getGroup(3) + " week(s)");
456 }
457 if (matchResult.getGroup(4) != null) {
458 result.add(matchResult.getGroup(4) + " day(s)");
459 }
460 if (matchResult.getGroup(6) != null) {
461 result.add(matchResult.getGroup(6) + " hour(s)");
462 }
463 if (matchResult.getGroup(7) != null) {
464 result.add(matchResult.getGroup(7) + " minute(s)");
465 }
466 if (matchResult.getGroup(8) != null) {
467 result.add(matchResult.getGroup(8) + " second(s)");
468 }
469 }
470 return join(result.toArray(), " ");
471 }
472
473 /**
474 * Tries to get the language of the web page. It looks for
475 * the 'lang' attribute in the HTML tag, if it doesn't find it looks
476 * the meta tags for Content-Language or Language properties.
477 *
478 * @param root The root element.
479 * @return A string containing the language(s) or empty.
480 */
481 public static String getLanguage(Element root) {
wychen 2016/05/31 21:56:34 If the language is specified in http header, inste
dalmirsilva 2016/07/06 17:53:34 Unfortunately it doesn't get it. We couldn't find
wychen 2016/07/24 23:06:33 Got it. I guess this is our technical limitation.
482 String language = root.getLang();
483 if (language.isEmpty()) {
484 NodeList<Element> metas = root.getElementsByTagName("META");
wychen 2016/05/31 21:56:34 Using "META[HTTP-EQUIV="content-language" i][CONTE
dalmirsilva 2016/07/06 17:53:34 Done.
485 for (int i = 0; i < metas.getLength(); i++) {
486 Element meta = metas.getItem(i);
487 if (meta.getAttribute("HTTP-EQUIV").toUpperCase()
488 .equals("CONTENT-LANGUAGE") ||
489 meta.getAttribute("NAME").toUpperCase()
490 .equals("LANGUAGE")) {
491 language = meta.getAttribute("CONTENT");
492 break;
493 }
494 }
495 }
496 return language;
497 }
498
439 public static int getArea(Element e) { 499 public static int getArea(Element e) {
440 if (e != null) { 500 if (e != null) {
441 return e.getOffsetHeight() * e.getOffsetWidth(); 501 return e.getOffsetHeight() * e.getOffsetWidth();
442 } 502 }
443 return 0; 503 return 0;
444 } 504 }
445 505
446 /** 506 /**
447 * Generate HTML/text output for a given node tree/subtree. This will ignore hidden 507 * Generate HTML/text output for a given node tree/subtree. This will ignore hidden
448 * elements. 508 * elements.
(...skipping 15 matching lines...) Expand all
464 return l.querySelectorAll(selectors); 524 return l.querySelectorAll(selectors);
465 }-*/; 525 }-*/;
466 526
467 public static native Document createHTMLDocument(Document doc) /*-{ 527 public static native Document createHTMLDocument(Document doc) /*-{
468 return doc.implementation.createHTMLDocument(); 528 return doc.implementation.createHTMLDocument();
469 }-*/; 529 }-*/;
470 530
471 public static native Element getFirstElementChild(Document document) /*-{ 531 public static native Element getFirstElementChild(Document document) /*-{
472 return document.firstElementChild; 532 return document.firstElementChild;
473 }-*/; 533 }-*/;
534
535 public static native String join(Object[] list, String conjunction) /*-{
536 return list.join(conjunction);
537 }-*/;
474 } 538 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698