java/org/chromium/distiller/PagingLinksFinder.java - Issue 1725243002: Fix some warnings in Eclipse

Side by Side Diff: java/org/chromium/distiller/PagingLinksFinder.java

Issue 1725243002: Fix some warnings in Eclipse (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master

Patch Set: rebase Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« java/org/chromium/distiller/PageParameterDetector.java ('K') | « java/org/chromium/distiller/PageParameterParser.java ('k') | java/org/chromium/distiller/SchemaOrgParser.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 /*	5 /*

6 * Parts of this file are adapted from Readability.	6 * Parts of this file are adapted from Readability.

7 *	7 *

8 * Readability is Copyright (c) 2010 Src90 Inc	8 * Readability is Copyright (c) 2010 Src90 Inc

9 * and licenced under the Apache License, Version 2.0.	9 * and licenced under the Apache License, Version 2.0.

10 */	10 */

11	11

12 package org.chromium.distiller;	12 package org.chromium.distiller;

13	13

14 import org.chromium.distiller.proto.DomDistillerProtos;	14 import org.chromium.distiller.proto.DomDistillerProtos;

15	15

16 import com.google.gwt.dom.client.AnchorElement;	16 import com.google.gwt.dom.client.AnchorElement;

17 import com.google.gwt.dom.client.BaseElement;	17 import com.google.gwt.dom.client.BaseElement;

18 import com.google.gwt.dom.client.Document;	18 import com.google.gwt.dom.client.Document;

19 import com.google.gwt.dom.client.Element;	19 import com.google.gwt.dom.client.Element;

20 import com.google.gwt.dom.client.NodeList;	20 import com.google.gwt.dom.client.NodeList;

21 import com.google.gwt.regexp.shared.RegExp;	21 import com.google.gwt.regexp.shared.RegExp;

22 import com.google.gwt.user.client.Window;

23	22

24 import java.util.ArrayList;

25 import java.util.Arrays;

26 import java.util.Collections;

27 import java.util.HashMap;	23 import java.util.HashMap;

28 import java.util.HashSet;	24 import java.util.HashSet;

29 import java.util.List;

30 import java.util.Map;	25 import java.util.Map;

31 import java.util.Set;	26 import java.util.Set;

32	27

33 /**	28 /**

34 * This class finds the next and previous page links for the distilled document. The functionality	29 * This class finds the next and previous page links for the distilled document. The functionality

35 * for next page links is migrated from readability.getArticleTitle() in chromiu m codebase's	30 * for next page links is migrated from readability.getArticleTitle() in chromiu m codebase's

36 * third_party/readability/js/readability.js, and then expanded for previous pag e links; boilerpipe	31 * third_party/readability/js/readability.js, and then expanded for previous pag e links; boilerpipe

37 * doesn't have such capability.	32 * doesn't have such capability.

38 * First, it determines the prefix URL of the document. Then, for each anchor i n the document, its	33 * First, it determines the prefix URL of the document. Then, for each anchor i n the document, its

39 * href and text are compared to the prefix URL and examined for next- or previo us-paging-related	34 * href and text are compared to the prefix URL and examined for next- or previo us-paging-related

(...skipping 14 matching lines...) Expand all Loading...
54 + "\|tool\|widget",	49 + "\|tool\|widget",

55 "i");	50 "i");

56 private static final RegExp REG_EXTRANEOUS = RegExp.compile(	51 private static final RegExp REG_EXTRANEOUS = RegExp.compile(

57 "print\|archive\|comment\|discuss\|e[\\-]?mail\|share\|reply\|all\|login\|sig n\|single"	52 "print\|archive\|comment\|discuss\|e[\\-]?mail\|share\|reply\|all\|login\|sig n\|single"

58 + "\|as one\|article\|post\|篇",	53 + "\|as one\|article\|post\|篇",

59 "i");	54 "i");

60 private static final RegExp REG_PAGINATION = RegExp.compile("pag(e\|ing\|inat) ", "i");	55 private static final RegExp REG_PAGINATION = RegExp.compile("pag(e\|ing\|inat) ", "i");

61 private static final RegExp REG_LINK_PAGINATION =	56 private static final RegExp REG_LINK_PAGINATION =

62 RegExp.compile("p(a\|g\|ag)?(e\|ing\|ination)?(=\|\\/)[0-9]{1,2}$", "i");	57 RegExp.compile("p(a\|g\|ag)?(e\|ing\|ination)?(=\|\\/)[0-9]{1,2}$", "i");

63 private static final RegExp REG_FIRST_LAST = RegExp.compile("(first\|last)", "i");	58 private static final RegExp REG_FIRST_LAST = RegExp.compile("(first\|last)", "i");

64 // Examples that match PAGE_NUMBER_REGEX are: "_p3", "-pg3", "p3", "_1", "-1 2-2".

65 // Examples that don't match PAGE_NUMBER_REGEX are: "_p3 ", "p", "p123".

66 private static final RegExp REG_PAGE_NUMBER =

67 RegExp.compile("((_\|-)?p[a-z]*\|(_\|-))[0-9]{1,2}$", "gi");

68

69 private static final RegExp REG_HREF_CLEANER = RegExp.compile("/?(#.*)?$");	59 private static final RegExp REG_HREF_CLEANER = RegExp.compile("/?(#.*)?$");

70 private static final RegExp REG_NUMBER = RegExp.compile("\\d");	60 private static final RegExp REG_NUMBER = RegExp.compile("\\d");

71	61

72 public static DomDistillerProtos.PaginationInfo getPaginationInfo(String ori ginal_url) {	62 public static DomDistillerProtos.PaginationInfo getPaginationInfo(String ori ginal_url) {

73 DomDistillerProtos.PaginationInfo info = DomDistillerProtos.PaginationIn fo.create();	63 DomDistillerProtos.PaginationInfo info = DomDistillerProtos.PaginationIn fo.create();

74 String next = findNext(Document.get().getDocumentElement(), original_url );	64 String next = findNext(Document.get().getDocumentElement(), original_url );

75 if (next != null) {	65 if (next != null) {

76 info.setNextPage(next);	66 info.setNextPage(next);

77 }	67 }

78 return info;	68 return info;

(...skipping 295 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
374	364

375 BaseElement base = doc.createBaseElement();	365 BaseElement base = doc.createBaseElement();

376 base.setHref(base_url);	366 base.setHref(base_url);

377 doc.getHead().appendChild(base);	367 doc.getHead().appendChild(base);

378	368

379 AnchorElement a = doc.createAnchorElement();	369 AnchorElement a = doc.createAnchorElement();

380 doc.getBody().appendChild(a);	370 doc.getBody().appendChild(a);

381 return a;	371 return a;

382 }	372 }

383	373

384 private static String fixMissingScheme(String url) {

385 if (url.isEmpty()) return "";

386 if (!url.contains("://")) return "http://" + url;

387 return url;

388 }

389

390 // The link is resolved using an anchor within a new HTML document with a ba se tag.	374 // The link is resolved using an anchor within a new HTML document with a ba se tag.

391 public static String resolveLinkHref(AnchorElement link, AnchorElement baseA nchor) {	375 public static String resolveLinkHref(AnchorElement link, AnchorElement baseA nchor) {

392 String linkHref = link.getAttribute("href");	376 String linkHref = link.getAttribute("href");

393 return resolveLinkHref(linkHref, baseAnchor);	377 return resolveLinkHref(linkHref, baseAnchor);

394 }	378 }

395	379

396 public static String resolveLinkHref(String linkHref, AnchorElement baseAnch or) {	380 public static String resolveLinkHref(String linkHref, AnchorElement baseAnch or) {

397 baseAnchor.setAttribute("href", linkHref);	381 baseAnchor.setAttribute("href", linkHref);

398 return baseAnchor.getHref();	382 return baseAnchor.getHref();

399 }	383 }

400	384

401 private static String getScheme(String url) {	385 private static String getScheme(String url) {

402 return StringUtil.split(url, ":\\/\\/")[0];	386 return StringUtil.split(url, ":\\/\\/")[0];

403 }	387 }

404	388

405 // Port number is also included if it exists.	389 // Port number is also included if it exists.

406 private static String getHostname(String url) {	390 private static String getHostname(String url) {

407 url = StringUtil.split(url, ":\\/\\/")[1];	391 url = StringUtil.split(url, ":\\/\\/")[1];

408 if (!url.contains("/")) return url;	392 if (!url.contains("/")) return url;

409 return StringUtil.split(url, "\\/")[0];	393 return StringUtil.split(url, "\\/")[0];

410 }	394 }

411	395

412 private static String getPath(String url) {

413 url = StringUtil.split(url, ":\\/\\/")[1];

414 if (!url.contains("/")) return "";

415 return StringUtil.findAndReplace(url, "^([^/]*)/", "");

416 }

417

418 public static Integer pageDiff(String url, String linkHref, AnchorElement li nk, int skip) {	396 public static Integer pageDiff(String url, String linkHref, AnchorElement li nk, int skip) {

419 int commonLen = skip;	397 int commonLen = skip;

420 int i;	398 int i;

421 for (i=skip; i<Math.min(url.length(), linkHref.length()); i++) {	399 for (i=skip; i<Math.min(url.length(), linkHref.length()); i++) {

422 if (url.charAt(i) != linkHref.charAt(i)) {	400 if (url.charAt(i) != linkHref.charAt(i)) {

423 break;	401 break;

424 }	402 }

425 }	403 }

426 commonLen = i;	404 commonLen = i;

427 url = url.substring(commonLen, url.length());	405 url = url.substring(commonLen, url.length());

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
499 }	477 }

500	478

501 private enum PageLink {	479 private enum PageLink {

502 NEXT,	480 NEXT,

503 PREV,	481 PREV,

504 }	482 }

505	483

506 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme nt, String>();	484 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme nt, String>();

507	485

508 }	486 }

OLD	NEW