java/org/chromium/distiller/PageParameterParser.java - Issue 1178633002: implement parser for new pagination algorithm

Side by Side Diff: java/org/chromium/distiller/PageParameterParser.java

Issue 1178633002: implement parser for new pagination algorithm (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master

Patch Set: addr chris's comments Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package org.chromium.distiller;

	6

	7 import org.chromium.distiller.proto.DomDistillerProtos;

	8 import org.chromium.distiller.proto.DomDistillerProtos.TimingInfo;

	9

	10 import com.google.gwt.dom.client.AnchorElement;

	11 import com.google.gwt.dom.client.Document;

	12 import com.google.gwt.dom.client.Element;

	13 import com.google.gwt.dom.client.Node;

	14 import com.google.gwt.dom.client.NodeList;

	15 import com.google.gwt.dom.client.Style;

	16 import com.google.gwt.regexp.shared.MatchResult;

	17 import com.google.gwt.regexp.shared.RegExp;

	18

	19 /**

	20 * Background:

	21 * The long article/news/forum thread/blog document may be partitioned into se veral partial pages

	22 * by webmaster. Each partial page has outlinks pointing to the adjacent part ial pages. The

	23 * anchor text of those outlinks is numeric.

	24 *

	25 * This class parses the document to collect groups of adjacent plain text numbe rs and outlinks with

	26 * digital anchor text. These are then passed to PageParameterParser which woul d spit out the

	27 * pagination URLs if available.

	28 */

	29 public class PageParameterParser {

	30 // If the numeric value of a link's anchor text is greater than this number, we don't think it

	31 // represents the page number of the link.

	32 private static final int MAX_NUM_FOR_PAGE_PARAM = 100;

	33

	34 /**

	35 * Stores PageParamInfo.PageInfo and the anchor's text, specifically returne d by

	36 * getPageInfoAndText().

	37 */

	38 private static class PageInfoAndText {

	39 private final PageParamInfo.PageInfo mPageInfo;

	40 private final String mText;

	41

	42 PageInfoAndText(int number, String url, String text) {

	43 mPageInfo = new PageParamInfo.PageInfo(number, url);

	44 mText = text;

	45 }

	46 }

	47

	48 /**

	49 * Entry point for PageParameterParser.

	50 * Parses the document to collect outlinks with digital anchor text and nume ric text around

	51 * them. These are then passed to PageParameterParser to detect pagination URLs.

	52 *

	53 * @return PageParamInfo (see PageParamInfo.java), always. If no page param eter is detected or

	54 * determined to be best, its mType is PageParamInfo.Type.UNSET.

	55 *

	56 * @param originalUrl the original URL of the document to be parsed.

	57 * @param timingInfo for tracking performance.

	58 */

	59 public static PageParamInfo parse(String originalUrl, TimingInfo timingInfo) {

	60 PageParameterParser parser = new PageParameterParser(timingInfo);

	61 return parser.parseDocument(Document.get().getDocumentElement(), origina lUrl);

	62 }

	63

	64 private final TimingInfo mTimingInfo;

	65 private String mDocUrl = "";

	66 private ParsedUrl mParsedUrl = null;

	67 private final MonotonicPageInfosGroups mAdjacentNumbersGroups = new Monotoni cPageInfosGroups();

	68

	69 private static RegExp sHrefCleaner = null;

	70

	71 private PageParameterParser(TimingInfo timingInfo) {

	72 mTimingInfo = timingInfo;

	73 }

	74

	75 /**

	76 * Acutually implements PageParameterParser.parse(), see above description f or parse().

	77 */

	78 private PageParamInfo parseDocument(Element root, String originalUrl) {

	79 double startTime = DomUtil.getTime();

	80

	81 mDocUrl = originalUrl;

	82 mParsedUrl = ParsedUrl.create(mDocUrl);

	83 if (mParsedUrl == null) return new PageParamInfo(); // Invalid document URL.

	84

	85 AnchorElement baseAnchor = PagingLinksFinder.createAnchorWithBase(

	86 PagingLinksFinder.getBaseUrlForRelative(root, originalUrl));

	87

	88 NodeList<Element> allLinks = root.getElementsByTagName("A");

	89 int idx = 0;

	90 while (idx < allLinks.getLength()) {

	91 final AnchorElement link = AnchorElement.as(allLinks.getItem(idx));

	92 PageInfoAndText pageInfoAndText = getPageInfoAndText(link, baseAncho r);

	93 if (pageInfoAndText == null) {

	94 idx++;

	95 continue;

	96 }

	97

	98 // This link is a good candidate for pagination.

	99

	100 // Close current group of adjacent numbers, add a new group if neces sary.

	101 mAdjacentNumbersGroups.addGroup();

	102

	103 // Before we append the link to the new group of adjacent numbers, c heck if it's

	104 // preceded by a sibling with text; if so, add it before the link.

	105 Node parentWrapper = null;
	cjhopman 2015/07/29 01:07:53 What's this parent wrapper? I don't recall that be What's this parent wrapper? I don't recall that being in the previous changes. I don't understand what it is for. kuan 2015/07/30 16:47:00 i had it in the previous change, and attempted to Show quoted text On 2015/07/29 01:07:53, cjhopman wrote: > What's this parent wrapper? I don't recall that being in the previous changes. I > don't understand what it is for. i had it in the previous change, and attempted to explain it @107 (now 108); the latest patch does include some fixes for urls in new dataset. some urls wrap its pagination links in a div and/or span. such links will not have siblings; instead their parent wrappers would have siblings that contain the pagination links. i call it a parent wrapper because it's the topmost parent that simply wraps the link without adding extra text. if i don't get this parent wrapper, i'd not get the adjacent text before or after the link it contains. cjhopman 2015/08/04 21:58:41 But why the parent wrapper thing? Why not just wal Show quoted text On 2015/07/30 16:47:00, kuan wrote: > On 2015/07/29 01:07:53, cjhopman wrote: > > What's this parent wrapper? I don't recall that being in the previous changes. > I > > don't understand what it is for. > > i had it in the previous change, and attempted to explain it @107 (now 108); the > latest patch does include some fixes for urls in new dataset. > some urls wrap its pagination links in a div and/or span. such links will not > have siblings; instead their parent wrappers would have siblings that contain > the pagination links. i call it a parent wrapper because it's the topmost > parent that simply wraps the link without adding extra text. if i don't get > this parent wrapper, i'd not get the adjacent text before or after the link it > contains. But why the parent wrapper thing? Why not just walk backwards/forwards in the tree until you hit a bad node (non-number text/anchor basically)? This parent wrapper approach just seems wrong and complicated and fails on a lot of cases that I think we would want to accept. kuan 2015/08/04 22:38:37 what do u mean by "backwards/forwards in the tree" Show quoted text On 2015/08/04 21:58:41, cjhopman wrote: > On 2015/07/30 16:47:00, kuan wrote: > > On 2015/07/29 01:07:53, cjhopman wrote: > > > What's this parent wrapper? I don't recall that being in the previous > changes. > > I > > > don't understand what it is for. > > > > i had it in the previous change, and attempted to explain it @107 (now 108); > the > > latest patch does include some fixes for urls in new dataset. > > some urls wrap its pagination links in a div and/or span. such links will not > > have siblings; instead their parent wrappers would have siblings that contain > > the pagination links. i call it a parent wrapper because it's the topmost > > parent that simply wraps the link without adding extra text. if i don't get > > this parent wrapper, i'd not get the adjacent text before or after the link it > > contains. > > But why the parent wrapper thing? Why not just walk backwards/forwards in the > tree until you hit a bad node (non-number text/anchor basically)? This parent > wrapper approach just seems wrong and complicated and fails on a lot of cases > that I think we would want to accept. what do u mean by "backwards/forwards in the tree"? breadth-wise or depth-wise? getting previous/next siblings is already walking breadth-wise, which yields nothing. hence, i hv to do upwards (i.e. depth-wise) to look for the parent wrapper. e.g. at http://www.doctoroz.com/article/andrew-weil-5-health-essentials (in page-links-golden-data.sstable), each pagination link is a child of a <li>. the pagination <a> has no sibling. if i don't get its parent wrapper i.e. the <li>, i won't know all the pagination links are adjacent to each other. kuan 2015/08/11 19:09:38 Done. Show quoted text On 2015/08/04 22:38:37, kuan wrote: > On 2015/08/04 21:58:41, cjhopman wrote: > > On 2015/07/30 16:47:00, kuan wrote: > > > On 2015/07/29 01:07:53, cjhopman wrote: > > > > What's this parent wrapper? I don't recall that being in the previous > > changes. > > > I > > > > don't understand what it is for. > > > > > > i had it in the previous change, and attempted to explain it @107 (now 108); > > the > > > latest patch does include some fixes for urls in new dataset. > > > some urls wrap its pagination links in a div and/or span. such links will > not > > > have siblings; instead their parent wrappers would have siblings that > contain > > > the pagination links. i call it a parent wrapper because it's the topmost > > > parent that simply wraps the link without adding extra text. if i don't get > > > this parent wrapper, i'd not get the adjacent text before or after the link > it > > > contains. > > > > But why the parent wrapper thing? Why not just walk backwards/forwards in the > > tree until you hit a bad node (non-number text/anchor basically)? This parent > > wrapper approach just seems wrong and complicated and fails on a lot of cases > > that I think we would want to accept. > > what do u mean by "backwards/forwards in the tree"? breadth-wise or depth-wise? > getting previous/next siblings is already walking breadth-wise, which yields > nothing. hence, i hv to do upwards (i.e. depth-wise) to look for the parent > wrapper. > e.g. at http://www.doctoroz.com/article/andrew-weil-5-health-essentials (in > page-links-golden-data.sstable), each pagination link is a child of a <li>. the > pagination <a> has no sibling. if i don't get its parent wrapper i.e. the <li>, > i won't know all the pagination links are adjacent to each other. Done.
	106 if (!checkForPrevSiblingWithText(link)) { // Link has no sibling.

	107 // The link could be a child of a parent that is simply a wrappe r, i.e. with no

	108 // extra text, in which case, we should be checking the siblings of the topmost

	109 // parent wrapper.

	110 parentWrapper = findParentWrapper(link, pageInfoAndText.mText.le ngth());

	111 if (parentWrapper != null) checkForPrevSiblingWithText(parentWra pper);

	112 }

	113

	114 // Add the link to the current group of adjacent numbers.

	115 mAdjacentNumbersGroups.addPageInfo(pageInfoAndText.mPageInfo);

	116

	117 // Add all following siblings with numeric text, with or without lin ks.

	118 int numLinksAdded = 0;

	119 if (parentWrapper == null)

	120 numLinksAdded = addFollowingSiblings(link, false, baseAnchor);

	121 else

	122 numLinksAdded = addFollowingSiblings(parentWrapper, true, baseAn chor);

	123

	124 // Skip the current link and links already processed in addFollowing Siblings().

	125 idx += 1 + numLinksAdded;

	126 } // while there're links.

	127

	128 mAdjacentNumbersGroups.cleanup();

	129

	130 LogUtil.addTimingInfo(startTime, mTimingInfo, "PageParameterParser");

	131

	132 startTime = DomUtil.getTime();

	133 PageParamInfo info = PageParameterDetector.detect(mAdjacentNumbersGroups , mDocUrl);

	134 LogUtil.addTimingInfo(startTime, mTimingInfo, "PageParameterDetector");

	135 return info;

	136 }

	137

	138

	139 /**

	140 * @return a populated PageInfoAndText if given link is to be added to mAdja centNumbersGroups.

	141 * Otherwise, returns null if link is to be ignored.

	142 * "javascript:void" links with numeric text are considered valid links to b e added.

	143 *

	144 * @param link to process.

	145 * @param baseAnchor created for the current document.

	146 */

	147 private PageInfoAndText getPageInfoAndText(AnchorElement link, AnchorElement baseAnchor) {

	148 // Ignore invisible links.

	149 int width = link.getOffsetWidth();

	150 int height = link.getOffsetHeight();

	151 if (width == 0 \|\| height == 0 \|\| !DomUtil.isVisible(link)) return null;
	cjhopman 2015/07/29 01:07:53 It seems odd that invisible links are handled here It seems odd that invisible links are handled here. Does that mean that in a sequence of numbered links we treat an invisible link the same as one that is visible but with non-number text? kuan 2015/07/30 16:47:00 invisible links need to be ignored. i do this her Show quoted text On 2015/07/29 01:07:53, cjhopman wrote: > It seems odd that invisible links are handled here. Does that mean that in a > sequence of numbered links we treat an invisible link the same as one that is > visible but with non-number text? invisible links need to be ignored. i do this here because this fn is where i decide if a link is to be considered for possible pagination. i guess the answer to ur question wld be yes - an invisible link should be ignored, as is a link with non-number text.
	152

	153 String linkHref = PagingLinksFinder.resolveLinkHref(link, baseAnchor);

	154 boolean isVoidLink = isVoidHref(linkHref);

	155 ParsedUrl url = ParsedUrl.create(linkHref);

	156 if (url == null \|\| (!isVoidLink && !url.getHost().equalsIgnoreCase(mPars edUrl.getHost()))) {

	157 return null;

	158 }

	159

	160 url.setHash("");

	161

	162 // Use javascript innerText (instead of javascript textContent) to only get visible text.

	163 String linkText = DomUtil.getInnerText(link);

	164 int number = linkTextToNumber(linkText);

	165 if (!isPlainPageNumber(number)) return null;

	166

	167 if (isVoidLink \|\| isDisabledLink(link)) return new PageInfoAndText(numbe r, "", linkText);

	168

	169 if (sHrefCleaner == null) sHrefCleaner = RegExp.compile("/?(#.*)?$");

	170 return new PageInfoAndText(number, sHrefCleaner.replace(url.toString(), ""), linkText);

	171 }

	172

	173 /**

	174 * Checks for previous sibling with word text. If the text contains digit(s ) as terms that

	175 * form a valid page number, the sibling is added to the current group of ad jacent numbers.

	176 * Otherwise, the current group of adjacent numbers is closed to end the cur rent adjacency, and

	177 * a new group is started.

	178 *

	179 * @return true if given start node has at least 1 sibling, false otherwise.

	180

	181 * @param start node to start checking with.

	182 */

	183 private boolean checkForPrevSiblingWithText(Node start) {
	cjhopman 2015/07/29 01:07:53 I'm having difficulty understanding both the way t I'm having difficulty understanding both the way that previous number siblings are found and the way that next number siblings are found. I would expect it to look something like: node = getPrevNode(node); // or node = getNextNode(node); if node is text: if is number: add it elif is non-number text: done elif is anchor: // the prevNode case doesn't need this one if is number: add it else: done kuan 2015/07/30 16:47:00 i initially had the check for previous and next nu Show quoted text On 2015/07/29 01:07:53, cjhopman wrote: > I'm having difficulty understanding both the way that previous number siblings > are found and the way that next number siblings are found. > > I would expect it to look something like: > > node = getPrevNode(node); // or node = getNextNode(node); > if node is text: > if is number: > add it > elif is non-number text: > done > elif is anchor: // the prevNode case doesn't need this one > if is number: > add it > else: > done > i initially had the check for previous and next number siblings in one function, but split it when i changed the algo as per ur suggestion in your previous review, because of the different terminating conditions and the type of siblings processed. 1) checking for previous number siblings stops when the sibling is a link/document, or contains text, or there's no more sibling. 2) checking for next number siblings stops when the sibling is a document, or there's no more sibling - links are specifically handled here. more often than not, (2) would theoretically take more iterations than (1). the algo is further complicated by the parent wrapper consideration, and more variations in urls in new dataset.
	184 Node node = start;

	185 Node prevNode = null;

	186 String text = "";

	187 // Find the first previous sibling that has inner text with words.

	188 do {

	189 prevNode = node;

	190 node = node.getPreviousSibling();

	191 if (node == null && prevNode == start) return false;

	192 if (node == null \|\| node.getNodeType() == Node.DOCUMENT_NODE) return true;

	193

	194 if (node.getNodeType() == Node.TEXT_NODE) {

	195 text = node.getNodeValue();

	196 } else {

	197 Element e = Element.as(node);

	198 // Previous link siblings or children have already been processe d.

	199 if (e.hasTagName("A") \|\| e.getElementsByTagName("A").getLength() > 0) return true;

	200 text = DomUtil.getInnerText(e);

	201 }

	202 } while (text.isEmpty() \|\| StringUtil.countWords(text) == 0);

	203

	204 addNumberText(text);

	205 return true;

	206 }

	207

	208 /**

	209 * Adds all following siblings (links and non-links) with numeric text. If the text contains

	210 * digit(s) as terms that form a valid page number, the sibling is added to the current group of

	211 * adjacent numbers. Otherwise, the current group of adjacent numbers is cl osed to end the

	212 * current adjacency, and a new group is started.

	213 *

	214 * @return number of links added.

	215

	216 * @param start node to start checking with.

	217 * @param isParentWrapper true if given start node is a parent wrapper of a link.

	218 * @param baseAnchor created for the current document.

	219 */

	220 private int addFollowingSiblings(Node start, boolean isParentWrapper,

	221 AnchorElement baseAnchor) {

	222 Node node = start;

	223 Node prevNode = null;

	224 String text = "";

	225 int numLinksProcessed = 0;

	226 // Find all following siblings, add them if their text is purely numeric .

	227 while (true) {

	228 prevNode = node;

	229 node = node.getNextSibling();

	230 if (node == null \|\| node.getNodeType() == Node.DOCUMENT_NODE) return numLinksProcessed;

	231

	232 boolean handled = false;

	233 if (node.getNodeType() == Node.TEXT_NODE) {

	234 text = node.getNodeValue();

	235 } else {

	236 Element e = Element.as(node);

	237 if (e.hasTagName("A")) {

	238 addValidLink(AnchorElement.as(e), baseAnchor);

	239 numLinksProcessed++;

	240 handled = true;

	241 } else if (isParentWrapper) {

	242 NodeList<Element> linkChildren = e.getElementsByTagName("A") ;

	243 final int numChildren = linkChildren.getLength();

	244 for (int i = 0; i < numChildren; i++) {

	245 addValidLink(AnchorElement.as(linkChildren.getItem(i)), baseAnchor);

	246 numLinksProcessed++;

	247 }

	248 if (numChildren > 0) handled = true;

	249 }

	250

	251 text = handled ? "" : DomUtil.getInnerText(e);

	252 }

	253

	254 if (!text.isEmpty() && StringUtil.countWords(text) > 0) addNumberTex t(text);

	255 }

	256 }

	257

	258 private static RegExp sTermsRegExp = null; // Match terms i.e. words.

	259 private static RegExp sSurroundingDigitsRegExp = null; // Match term with o nly digits.

	260

	261 /**

	262 * Add PageParamInfo.PageInfo for a non-link with numeric text.
	cjhopman 2015/07/29 01:07:53 It looks like the text doesn't have to be strictly It looks like the text doesn't have to be strictly numeric. Is that correct? Could you expand here on what sort of text is accepted/rejected? kuan 2015/07/30 16:47:00 Done. renamed fn too. Show quoted text On 2015/07/29 01:07:53, cjhopman wrote: > It looks like the text doesn't have to be strictly numeric. Is that correct? > Could you expand here on what sort of text is accepted/rejected? Done. renamed fn too.
	263 */

	264 private void addNumberText(String text) {

	265 if (!StringUtil.containsDigit(text)) {

	266 // The sibling does not contain valid number(s); if necessary, curre nt group of adjacent

	267 // numbers should be closed, adding a new group if possible.

	268 mAdjacentNumbersGroups.addGroup();

	269 return;

	270 }

	271

	272 if (sTermsRegExp == null) {

	273 sTermsRegExp = RegExp.compile("(\\S[\\w\u00C0-\u1FFF\u2C00-\uD7FF]\ \S)", "gi");

	274 } else {

	275 sTermsRegExp.setLastIndex(0);

	276 }

	277 if (sSurroundingDigitsRegExp == null) {

	278 sSurroundingDigitsRegExp = RegExp.compile("^[\\W_](\\d+)[\\W_]$", "i");

	279 }

	280

	281 // Extract terms from the text, differentiating between those that conta in only digits and

	282 // those that contain non-digits.

	283 while (true) {

	284 MatchResult match = sTermsRegExp.exec(text);

	285 if (match == null) break;

	286 if (match.getGroupCount() <= 1) continue;

	287

	288 String term = match.getGroup(1);

	289 MatchResult termWithDigits = sSurroundingDigitsRegExp.exec(term);

	290 int number = -1;

	291 if (termWithDigits != null && termWithDigits.getGroupCount() > 1) {

	292 number = StringUtil.toNumber(termWithDigits.getGroup(1));

	293 }

	294 if (isPlainPageNumber(number)) {

	295 // This sibling is a valid candidate of plain text page number, add it to last

	296 // group of adjacent numbers.

	297 mAdjacentNumbersGroups.addNumber(number, "");

	298 } else {

	299 // The sibling is not a valid number, so current group of adjace nt numbers

	300 // should be closed, adding a new group if possible.

	301 mAdjacentNumbersGroups.addGroup();

	302 }

	303 } // while there're matches

	304 }

	305

	306 /**

	307 * Add PageParamInfo.PageInfo for a link if its text is numeric.

	308 */

	309 private void addValidLink(AnchorElement link, AnchorElement baseAnchor) {
	cjhopman 2015/07/29 01:07:53 probably rename this to addLinkIfValid() since it probably rename this to addLinkIfValid() since it doesn't always add the link. but really, it seems odd that this might not add the link, but then it doesn't tell the caller whether it added it or not. The callers don't care if it is added or not? Or is it that they will only be passing valid links that will always get added? kuan 2015/07/30 16:47:00 Done. this fn is created simply to prevent duplic Show quoted text On 2015/07/29 01:07:53, cjhopman wrote: > probably rename this to addLinkIfValid() since it doesn't always add the link. > > but really, it seems odd that this might not add the link, but then it doesn't > tell the caller whether it added it or not. The callers don't care if it is > added or not? Or is it that they will only be passing valid links that will > always get added? Done. this fn is created simply to prevent duplication, 'cos it's called twice when checking for next number siblings. the caller doesn't care if link is added, it just cares that link has been processed.
	310 PageInfoAndText pageInfoAndText = getPageInfoAndText(link, baseAnchor);

	311 if (pageInfoAndText != null) mAdjacentNumbersGroups.addPageInfo(pageInfo AndText.mPageInfo);

	312 }

	313

	314 /**

	315 * @return the topmost parent of the given node that simply wraps the node, i.e. with no more

	316 * inner text than that of given node.

	317 */

	318 private static Node findParentWrapper(Node node, int nodeTextLen) {

	319 Node parent = node;

	320 Node prevParent = null;

	321 // While keeping track of each parent, once we find the first one that h as more text than

	322 // given node, the previous parent would be what we want.

	323 do {

	324 prevParent = parent;

	325 parent = parent.getParentNode();

	326 } while (parent != null && DomUtil.getInnerText(parent).length() == node TextLen);

	327

	328 return prevParent == node \|\| prevParent.getNodeType() == Node.DOCUMENT_N ODE ?

	329 null : prevParent;

	330 }

	331

	332 /**

	333 * @return true if link is disabled i.e. not clickable because it has a text cursor.

	334 */

	335 private static boolean isDisabledLink(AnchorElement link) {

	336 Style style = DomUtil.getComputedStyle(link);

	337 return Style.Cursor.valueOf(style.getCursor().toUpperCase()) == Style.Cu rsor.TEXT;

	338 }

	339

	340 /**

	341 * @return true if href is "javascript:void(0)".

	342 */

	343 private static boolean isVoidHref(String href) {

	344 return href.equals("javascript:void(0)");

	345 }

	346

	347 private static int linkTextToNumber(String linkText) {

	348 linkText = linkText.replaceAll("[()\\[\\]{}]", "");

	349 linkText = linkText.trim(); // Remove leading and trailing whitespaces.

	350 // Remove duplicate internal whitespaces.

	351 linkText = linkText.replaceAll("\\s\\{2,\\}", " ");

	352 return StringUtil.toNumber(linkText);

	353 }

	354

	355 /**

	356 * @returns true if number is >= 0 && < MAX_NUM_FOR_PAGE_PARAM.

	357 */

	358 private static boolean isPlainPageNumber(int number) {

	359 return number >= 0 && number < MAX_NUM_FOR_PAGE_PARAM;

	360 }

	361

	362 }

OLD	NEW

« no previous file with comments | « java/org/chromium/distiller/MonotonicPageInfosGroups.java ('k') | java/org/chromium/distiller/ParsedUrl.java » ('j') | no next file with comments »