Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(889)

Side by Side Diff: utils/apidoc/mdn/extract.dart

Issue 1361163002: remove docgen remnants from repo, update CHANGELOG (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: remove unused code Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « utils/apidoc/mdn/data/domTypes.json ('k') | utils/apidoc/mdn/extract.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import "dart:collection";
6 import 'dart:convert';
7 import 'dart:html';
8
9 // Workaround for HTML lib missing feature.
10 Range newRange() {
11 return document.createRange();
12 }
13
14 // Temporary range object to optimize performance computing client rects
15 // from text nodes.
16 Range _tempRange;
17 // Hacks because ASYNC measurement is annoying when just writing a script.
18 ClientRect getClientRect(Node n) {
19 if (n is Element) {
20 return n.$dom_getBoundingClientRect();
21 } else {
22 // Crazy hacks that works for nodes.... create a range and measure it.
23 if (_tempRange == null) {
24 _tempRange = newRange();
25 }
26 _tempRange.setStartBefore(n);
27 _tempRange.setEndAfter(n);
28 return _tempRange.getBoundingClientRect();
29 }
30 }
31
32 /**
33 * CSS class that is added to elements in the DOM to indicate that they should
34 * be removed when extracting blocks of documentation. This is helpful when
35 * running this script in a web browser as it is easy to visually see what
36 * blocks of information were extracted when using CSS such as DEBUG_CSS
37 * which highlights elements that should be removed.
38 */
39 const DART_REMOVED = "dart-removed";
40
41 const DEBUG_CSS = """
42 <style type="text/css">
43 .dart-removed {
44 background-color: rgba(255, 0, 0, 0.5);
45 }
46 </style>""";
47
48 const MIN_PIXELS_DIFFERENT_LINES = 10;
49
50 const IDL_SELECTOR = "pre.eval, pre.idl";
51
52 Map data;
53
54 // TODO(rnystrom): Hack! Copied from domTypes.json. Instead of hard-coding
55 // these, should use the same mapping that the DOM/HTML code generators use.
56 var domTypes;
57 const domTypesRaw = const [
58 "AbstractWorker", "ArrayBuffer", "ArrayBufferView", "Attr",
59 "AudioBuffer", "AudioBufferSourceNode", "AudioChannelMerger",
60 "AudioChannelSplitter", "AudioContext", "AudioDestinationNode",
61 "AudioGain", "AudioGainNode", "AudioListener", "AudioNode",
62 "AudioPannerNode", "AudioParam", "AudioProcessingEvent",
63 "AudioSourceNode", "BarInfo", "BeforeLoadEvent", "BiquadFilterNode",
64 "Blob", "CDATASection", "CSSCharsetRule", "CSSFontFaceRule",
65 "CSSImportRule", "CSSMediaRule", "CSSPageRule", "CSSPrimitiveValue",
66 "CSSRule", "CSSRuleList", "CSSStyleDeclaration", "CSSStyleRule",
67 "CSSStyleSheet", "CSSUnknownRule", "CSSValue", "CSSValueList",
68 "CanvasGradient", "CanvasPattern", "CanvasPixelArray",
69 "CanvasRenderingContext", "CanvasRenderingContext2D",
70 "CharacterData", "ClientRect", "ClientRectList", "Clipboard",
71 "CloseEvent", "Comment", "CompositionEvent", "Console",
72 "ConvolverNode", "Coordinates", "Counter", "Crypto", "CustomEvent",
73 "DOMApplicationCache", "DOMException", "DOMFileSystem",
74 "DOMFileSystemSync", "DOMFormData", "DOMImplementation",
75 "DOMMimeType", "DOMMimeTypeArray", "DOMParser", "DOMPlugin",
76 "DOMPluginArray", "DOMSelection", "DOMSettableTokenList",
77 "DOMTokenList", "DOMURL", "DOMWindow", "DataTransferItem",
78 "DataTransferItemList", "DataView", "Database", "DatabaseSync",
79 "DedicatedWorkerContext", "DelayNode", "DeviceMotionEvent",
80 "DeviceOrientationEvent", "DirectoryEntry", "DirectoryEntrySync",
81 "DirectoryReader", "DirectoryReaderSync", "Document",
82 "DocumentFragment", "DocumentType", "DynamicsCompressorNode",
83 "Element", "ElementTimeControl", "ElementTraversal", "Entity",
84 "EntityReference", "Entry", "EntryArray", "EntryArraySync",
85 "EntrySync", "ErrorEvent", "Event", "EventException", "EventSource",
86 "EventTarget", "File", "FileEntry", "FileEntrySync", "FileError",
87 "FileException", "FileList", "FileReader", "FileReaderSync",
88 "FileWriter", "FileWriterSync", "Float32Array", "Float64Array",
89 "Geolocation", "Geoposition", "HTMLAllCollection",
90 "HTMLAnchorElement", "HTMLAppletElement", "HTMLAreaElement",
91 "HTMLAudioElement", "HTMLBRElement", "HTMLBaseElement",
92 "HTMLBaseFontElement", "HTMLBodyElement", "HTMLButtonElement",
93 "HTMLCanvasElement", "HTMLCollection", "HTMLDListElement",
94 "HTMLDataListElement", "HTMLDetailsElement", "HTMLDirectoryElement",
95 "HTMLDivElement", "HTMLDocument", "HTMLElement", "HTMLEmbedElement",
96 "HTMLFieldSetElement", "HTMLFontElement", "HTMLFormElement",
97 "HTMLFrameElement", "HTMLFrameSetElement", "HTMLHRElement",
98 "HTMLHeadElement", "HTMLHeadingElement", "HTMLHtmlElement",
99 "HTMLIFrameElement", "HTMLImageElement", "HTMLInputElement",
100 "HTMLIsIndexElement", "HTMLKeygenElement", "HTMLLIElement",
101 "HTMLLabelElement", "HTMLLegendElement", "HTMLLinkElement",
102 "HTMLMapElement", "HTMLMarqueeElement", "HTMLMediaElement",
103 "HTMLMenuElement", "HTMLMetaElement", "HTMLMeterElement",
104 "HTMLModElement", "HTMLOListElement", "HTMLObjectElement",
105 "HTMLOptGroupElement", "HTMLOptionElement", "HTMLOptionsCollection",
106 "HTMLOutputElement", "HTMLParagraphElement", "HTMLParamElement",
107 "HTMLPreElement", "HTMLProgressElement", "HTMLQuoteElement",
108 "HTMLScriptElement", "HTMLSelectElement", "HTMLSourceElement",
109 "HTMLSpanElement", "HTMLStyleElement", "HTMLTableCaptionElement",
110 "HTMLTableCellElement", "HTMLTableColElement", "HTMLTableElement",
111 "HTMLTableRowElement", "HTMLTableSectionElement",
112 "HTMLTextAreaElement", "HTMLTitleElement", "HTMLTrackElement",
113 "HTMLUListElement", "HTMLUnknownElement", "HTMLVideoElement",
114 "HashChangeEvent", "HighPass2FilterNode", "History", "IDBAny",
115 "IDBCursor", "IDBCursorWithValue", "IDBDatabase",
116 "IDBDatabaseError", "IDBDatabaseException", "IDBFactory",
117 "IDBIndex", "IDBKey", "IDBKeyRange", "IDBObjectStore", "IDBRequest",
118 "IDBTransaction", "IDBVersionChangeEvent",
119 "IDBVersionChangeRequest", "ImageData", "InjectedScriptHost",
120 "InspectorFrontendHost", "Int16Array", "Int32Array", "Int8Array",
121 "JavaScriptAudioNode", "JavaScriptCallFrame", "KeyboardEvent",
122 "Location", "LowPass2FilterNode", "MediaElementAudioSourceNode",
123 "MediaError", "MediaList", "MediaQueryList",
124 "MediaQueryListListener", "MemoryInfo", "MessageChannel",
125 "MessageEvent", "MessagePort", "Metadata", "MouseEvent",
126 "MutationCallback", "MutationEvent", "MutationRecord",
127 "NamedNodeMap", "Navigator", "NavigatorUserMediaError",
128 "NavigatorUserMediaSuccessCallback", "Node", "NodeFilter",
129 "NodeIterator", "NodeList", "NodeSelector", "Notation",
130 "Notification", "NotificationCenter", "OESStandardDerivatives",
131 "OESTextureFloat", "OESVertexArrayObject",
132 "OfflineAudioCompletionEvent", "OperationNotAllowedException",
133 "OverflowEvent", "PageTransitionEvent", "Performance",
134 "PerformanceNavigation", "PerformanceTiming", "PopStateEvent",
135 "PositionError", "ProcessingInstruction", "ProgressEvent",
136 "RGBColor", "Range", "RangeException", "RealtimeAnalyserNode",
137 "Rect", "SQLError", "SQLException", "SQLResultSet",
138 "SQLResultSetRowList", "SQLTransaction", "SQLTransactionSync",
139 "SVGAElement", "SVGAltGlyphDefElement", "SVGAltGlyphElement",
140 "SVGAltGlyphItemElement", "SVGAngle", "SVGAnimateColorElement",
141 "SVGAnimateElement", "SVGAnimateMotionElement",
142 "SVGAnimateTransformElement", "SVGAnimatedAngle",
143 "SVGAnimatedBoolean", "SVGAnimatedEnumeration",
144 "SVGAnimatedInteger", "SVGAnimatedLength", "SVGAnimatedLengthList",
145 "SVGAnimatedNumber", "SVGAnimatedNumberList",
146 "SVGAnimatedPreserveAspectRatio", "SVGAnimatedRect",
147 "SVGAnimatedString", "SVGAnimatedTransformList",
148 "SVGAnimationElement", "SVGCircleElement", "SVGClipPathElement",
149 "SVGColor", "SVGComponentTransferFunctionElement",
150 "SVGCursorElement", "SVGDefsElement", "SVGDescElement",
151 "SVGDocument", "SVGElement", "SVGElementInstance",
152 "SVGElementInstanceList", "SVGEllipseElement", "SVGException",
153 "SVGExternalResourcesRequired", "SVGFEBlendElement",
154 "SVGFEColorMatrixElement", "SVGFEComponentTransferElement",
155 "SVGFECompositeElement", "SVGFEConvolveMatrixElement",
156 "SVGFEDiffuseLightingElement", "SVGFEDisplacementMapElement",
157 "SVGFEDistantLightElement", "SVGFEDropShadowElement",
158 "SVGFEFloodElement", "SVGFEFuncAElement", "SVGFEFuncBElement",
159 "SVGFEFuncGElement", "SVGFEFuncRElement",
160 "SVGFEGaussianBlurElement", "SVGFEImageElement",
161 "SVGFEMergeElement", "SVGFEMergeNodeElement",
162 "SVGFEMorphologyElement", "SVGFEOffsetElement",
163 "SVGFEPointLightElement", "SVGFESpecularLightingElement",
164 "SVGFESpotLightElement", "SVGFETileElement",
165 "SVGFETurbulenceElement", "SVGFilterElement",
166 "SVGFilterPrimitiveStandardAttributes", "SVGFitToViewBox",
167 "SVGFontElement", "SVGFontFaceElement", "SVGFontFaceFormatElement",
168 "SVGFontFaceNameElement", "SVGFontFaceSrcElement",
169 "SVGFontFaceUriElement", "SVGForeignObjectElement", "SVGGElement",
170 "SVGGlyphElement", "SVGGlyphRefElement", "SVGGradientElement",
171 "SVGHKernElement", "SVGImageElement", "SVGLangSpace", "SVGLength",
172 "SVGLengthList", "SVGLineElement", "SVGLinearGradientElement",
173 "SVGLocatable", "SVGMPathElement", "SVGMarkerElement",
174 "SVGMaskElement", "SVGMatrix", "SVGMetadataElement",
175 "SVGMissingGlyphElement", "SVGNumber", "SVGNumberList", "SVGPaint",
176 "SVGPathElement", "SVGPathSeg", "SVGPathSegArcAbs",
177 "SVGPathSegArcRel", "SVGPathSegClosePath",
178 "SVGPathSegCurvetoCubicAbs", "SVGPathSegCurvetoCubicRel",
179 "SVGPathSegCurvetoCubicSmoothAbs",
180 "SVGPathSegCurvetoCubicSmoothRel", "SVGPathSegCurvetoQuadraticAbs",
181 "SVGPathSegCurvetoQuadraticRel",
182 "SVGPathSegCurvetoQuadraticSmoothAbs",
183 "SVGPathSegCurvetoQuadraticSmoothRel", "SVGPathSegLinetoAbs",
184 "SVGPathSegLinetoHorizontalAbs", "SVGPathSegLinetoHorizontalRel",
185 "SVGPathSegLinetoRel", "SVGPathSegLinetoVerticalAbs",
186 "SVGPathSegLinetoVerticalRel", "SVGPathSegList",
187 "SVGPathSegMovetoAbs", "SVGPathSegMovetoRel", "SVGPatternElement",
188 "SVGPoint", "SVGPointList", "SVGPolygonElement",
189 "SVGPolylineElement", "SVGPreserveAspectRatio",
190 "SVGRadialGradientElement", "SVGRect", "SVGRectElement",
191 "SVGRenderingIntent", "SVGSVGElement", "SVGScriptElement",
192 "SVGSetElement", "SVGStopElement", "SVGStringList", "SVGStylable",
193 "SVGStyleElement", "SVGSwitchElement", "SVGSymbolElement",
194 "SVGTRefElement", "SVGTSpanElement", "SVGTests",
195 "SVGTextContentElement", "SVGTextElement", "SVGTextPathElement",
196 "SVGTextPositioningElement", "SVGTitleElement", "SVGTransform",
197 "SVGTransformList", "SVGTransformable", "SVGURIReference",
198 "SVGUnitTypes", "SVGUseElement", "SVGVKernElement",
199 "SVGViewElement", "SVGViewSpec", "SVGZoomAndPan", "SVGZoomEvent",
200 "Screen", "ScriptProfile", "ScriptProfileNode", "SharedWorker",
201 "SharedWorkercontext", "SpeechInputEvent", "SpeechInputResult",
202 "SpeechInputResultList", "Storage", "StorageEvent", "StorageInfo",
203 "StyleMedia", "StyleSheet", "StyleSheetList", "Text", "TextEvent",
204 "TextMetrics", "TextTrack", "TextTrackCue", "TextTrackCueList",
205 "TimeRanges", "Touch", "TouchEvent", "TouchList", "TreeWalker",
206 "UIEvent", "Uint16Array", "Uint32Array", "Uint8Array",
207 "ValidityState", "VoidCallback", "WaveShaperNode",
208 "WebGLActiveInfo", "WebGLBuffer", "WebGLContextAttributes",
209 "WebGLContextEvent", "WebGLDebugRendererInfo", "WebGLDebugShaders",
210 "WebGLFramebuffer", "WebGLProgram", "WebGLRenderbuffer",
211 "WebGLRenderingContext", "WebGLShader", "WebGLTexture",
212 "WebGLUniformLocation", "WebGLVertexArrayObjectOES",
213 "WebKitAnimation", "WebKitAnimationEvent", "WebKitAnimationList",
214 "WebKitBlobBuilder", "WebKitCSSFilterValue",
215 "WebKitCSSKeyframeRule", "WebKitCSSKeyframesRule",
216 "WebKitCSSMatrix", "WebKitCSSTransformValue", "WebKitFlags",
217 "WebKitLoseContext", "WebKitMutationObserver", "WebKitPoint",
218 "WebKitTransitionEvent", "WebSocket", "WheelEvent", "Worker",
219 "WorkerContext", "WorkerLocation", "WorkerNavigator",
220 "XMLHttpRequest", "XMLHttpRequestException",
221 "XMLHttpRequestProgressEvent", "XMLHttpRequestUpload",
222 "XMLSerializer", "XPathEvaluator", "XPathException",
223 "XPathExpression", "XPathNSResolver", "XPathResult",
224 "XSLTProcessor", "AudioBufferCallback", "DatabaseCallback",
225 "EntriesCallback", "EntryCallback", "ErrorCallback", "FileCallback",
226 "FileSystemCallback", "FileWriterCallback", "MetadataCallback",
227 "NavigatorUserMediaErrorCallback", "PositionCallback",
228 "PositionErrorCallback", "SQLStatementCallback",
229 "SQLStatementErrorCallback", "SQLTransactionCallback",
230 "SQLTransactionErrorCallback", "SQLTransactionSyncCallback",
231 "StorageInfoErrorCallback", "StorageInfoQuotaCallback",
232 "StorageInfoUsageCallback", "StringCallback"
233 ];
234
235 Map dbEntry;
236
237 Map get dartIdl => data['dartIdl'];
238 String get currentType => data['type'];
239
240 String _currentTypeShort;
241 String get currentTypeShort {
242 if (_currentTypeShort == null) {
243 _currentTypeShort = currentType;
244 _currentTypeShort = trimPrefix(_currentTypeShort, "HTML");
245 _currentTypeShort = trimPrefix(_currentTypeShort, "SVG");
246 _currentTypeShort = trimPrefix(_currentTypeShort, "DOM");
247 _currentTypeShort = trimPrefix(_currentTypeShort, "WebKit");
248 _currentTypeShort = trimPrefix(_currentTypeShort, "Webkit");
249 }
250 return _currentTypeShort;
251 }
252
253 String _currentTypeTiny;
254 String get currentTypeTiny {
255 if (_currentTypeTiny == null) {
256 _currentTypeTiny = currentTypeShort;
257 _currentTypeTiny = trimEnd(_currentTypeTiny, "Element");
258 }
259 return _currentTypeTiny;
260 }
261
262 Map get searchResult => data['searchResult'];
263 String get pageUrl => searchResult['link'];
264
265 String _pageDomain;
266 String get pageDomain {
267 if (_pageDomain == null) {
268 _pageDomain = pageUrl.substring(0, pageUrl.indexOf("/", "https://".length));
269 }
270 return _pageDomain;
271 }
272
273 String get pageDir {
274 return pageUrl.substring(0, pageUrl.lastIndexOf('/') + 1);
275 }
276
277 String getAbsoluteUrl(AnchorElement anchor) {
278 if (anchor == null || anchor.href.length == 0) return '';
279 String path = anchor.href;
280 RegExp fullUrlRegExp = new RegExp("^https?://");
281 if (fullUrlRegExp.hasMatch(path)) return path;
282 if (path.startsWith('/')) {
283 return "$pageDomain$path";
284 } else if (path.startsWith("#")) {
285 return "$pageUrl$path";
286 } else {
287 return "$pageDir$path";
288 }
289 }
290
291 bool inTable(Node n) {
292 while (n != null) {
293 if (n is TableElement) return true;
294 n = n.parent;
295 }
296 return false;
297 }
298
299 String escapeHTML(str) {
300 Element e = new Element.tag("div");
301 e.text = str;
302 return e.innerHTML;
303 }
304
305 List<Text> getAllTextNodes(Element elem) {
306 final nodes = <Text>[];
307 helper(Node n) {
308 if (n is Text) {
309 nodes.add(n);
310 } else {
311 for (Node child in n.nodes) {
312 helper(child);
313 }
314 }
315 };
316
317 helper(elem);
318 return nodes;
319 }
320
321 /**
322 * Whether a node and its children are all types that are safe to skip if the
323 * nodes have no text content.
324 */
325 bool isSkippableType(Node n) {
326 // TODO(jacobr): are there any types we don't want to skip even if they
327 // have no text content?
328 if (n is ImageElement || n is CanvasElement || n is InputElement
329 || n is ObjectElement) {
330 return false;
331 }
332 if (n is Text) return true;
333
334 for (final child in n.nodes) {
335 if (!isSkippableType(child)) {
336 return false;
337 }
338 }
339 return true;
340 }
341
342 bool isSkippable(Node n) {
343 if (!isSkippableType(n)) return false;
344 return n.text.trim().length == 0;
345 }
346
347 void onEnd() {
348 // Hideous hack to send JSON back to JS.
349 String dbJson = JSON.encode(dbEntry);
350 // workaround bug in JSON.decode.
351 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n");
352
353 // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple
354 // isolate based Dart-JS interop solution in the future.
355 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*");
356 }
357
358 class SectionParseResult {
359 final String html;
360 final String url;
361 final String idl;
362 SectionParseResult(this.html, this.url, this.idl);
363 }
364
365 String genCleanHtml(Element root) {
366 for (final e in root.queryAll(".$DART_REMOVED")) {
367 e.classes.remove(DART_REMOVED);
368 }
369
370 // Ditch inline styles.
371 for (final e in root.queryAll('[style]')) {
372 e.attributes.remove('style');
373 }
374
375 // These elements are just tags that we should suppress.
376 for (final e in root.queryAll(".lang.lang-en")) {
377 e.remove();
378 }
379
380 Element parametersHeader;
381 Element returnValueHeader;
382 for (final e in root.queryAll("h6")) {
383 if (e.text == 'Parameters') {
384 parametersHeader = e;
385 } else if (e.text == 'Return value') {
386 returnValueHeader = e;
387 }
388 }
389
390 if (parametersHeader != null) {
391 int numEmptyParameters = 0;
392 final parameterDescriptions = root.queryAll("dd");
393 for (Element parameterDescription in parameterDescriptions) {
394 if (parameterDescription.text.trim().length == 0) {
395 numEmptyParameters++;
396 }
397 }
398 if (numEmptyParameters > 0 &&
399 numEmptyParameters == parameterDescriptions.length) {
400 // Remove the parameter list as it adds zero value as all descriptions
401 // are empty.
402 parametersHeader.remove();
403 for (final e in root.queryAll("dl")) {
404 e.remove();
405 }
406 } else if (parameterDescriptions.length == 0 &&
407 parametersHeader.nextElementSibling != null &&
408 parametersHeader.nextElementSibling.text.trim() == 'None.') {
409 // No need to display that the function takes 0 parameters.
410 parametersHeader.nextElementSibling.remove();
411 parametersHeader.remove();
412 }
413 }
414
415 // Heuristic: if the return value is a single word it is a type name not a
416 // useful text description so suppress it.
417 if (returnValueHeader != null &&
418 returnValueHeader.nextElementSibling != null &&
419 returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) {
420 returnValueHeader.nextElementSibling.remove();
421 returnValueHeader.remove();
422 }
423
424 bool changed = true;
425 while (changed) {
426 changed = false;
427 while (root.nodes.length == 1 && root.nodes.first is Element) {
428 root = root.nodes.first;
429 changed = true;
430 }
431
432 // Trim useless nodes from the front.
433 while (root.nodes.length > 0 &&
434 isSkippable(root.nodes.first)) {
435 root.nodes.first.remove();
436 changed = true;
437 }
438
439 // Trim useless nodes from the back.
440 while (root.nodes.length > 0 &&
441 isSkippable(root.nodes.last)) {
442 root.nodes.last.remove();
443 changed = true;
444 }
445 }
446 return JSONFIXUPHACK(root.innerHTML);
447 }
448
449 String genPrettyHtmlFromElement(Element e) {
450 e = e.clone(true);
451 return genCleanHtml(e);
452 }
453
454 class PostOrderTraversalIterator implements Iterator<Node> {
455
456 Node _next;
457 Node _current;
458
459 PostOrderTraversalIterator(Node start) {
460 _next = _leftMostDescendent(start);
461 }
462
463 Node get current => _current;
464 bool get hasNext => _next != null;
465
466 bool moveNext() {
467 _current = _next;
468 if (_next == null) return false;
469 if (_next.nextNode != null) {
470 _next = _leftMostDescendent(_next.nextNode);
471 } else {
472 _next = _next.parent;
473 }
474 return true;
475 }
476
477 static Node _leftMostDescendent(Node n) {
478 while (n.nodes.length > 0) {
479 n = n.nodes.first;
480 }
481 return n;
482 }
483 }
484
485 class PostOrderTraversal extends IterableBase<Node> {
486 final Node _node;
487 PostOrderTraversal(this._node);
488
489 Iterator<Node> get iterator => new PostOrderTraversalIterator(_node);
490 }
491
492 /**
493 * Estimate what content represents the first line of text within the [section]
494 * range returning null if there isn't a plausible first line of text that
495 * contains the string [prop]. We measure the actual rendered client rectangle
496 * for the text and use heuristics defining how many pixels text can vary by
497 * and still be viewed as being on the same line.
498 */
499 Range findFirstLine(Range section, String prop) {
500 final firstLine = newRange();
501 firstLine.setStart(section.startContainer, section.startOffset);
502
503 num maxBottom = null;
504 for (final n in new PostOrderTraversal(section.startContainer)) {
505 int compareResult = section.comparePoint(n, 0);
506 if (compareResult == -1) {
507 // before range so skip.
508 continue;
509 } else if (compareResult > 0) {
510 // After range so exit.
511 break;
512 }
513
514 final rect = getClientRect(n);
515 num bottom = rect.bottom;
516 if (rect.height > 0 && rect.width > 0) {
517 if (maxBottom != null &&
518 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) {
519 break;
520 } else if (maxBottom == null || maxBottom > bottom) {
521 maxBottom = bottom;
522 }
523 }
524
525 firstLine.setEndAfter(n);
526 }
527
528 // If the first line of text in the section does not contain the property
529 // name then we're not confident we are able to extract a high accuracy match
530 // so we should not return anything.
531 if (!firstLine.toString().contains(stripWebkit(prop))) {
532 return null;
533 }
534 return firstLine;
535 }
536
537 /** Find child anchor elements that contain the text [prop]. */
538 AnchorElement findAnchorElement(Element root, String prop) {
539 for (AnchorElement a in root.queryAll("a")) {
540 if (a.text.contains(prop)) {
541 return a;
542 }
543 }
544 return null;
545 }
546
547 // First surrounding element with an ID is safe enough.
548 Element findTighterRoot(Element elem, Element root) {
549 Element candidate = elem;
550 while (root != candidate) {
551 candidate = candidate.parent;
552 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {
553 break;
554 }
555 }
556 return candidate;
557 }
558
559 // TODO(jacobr): this is very slow and ugly.. consider rewriting or at least
560 // commenting carefully.
561 SectionParseResult filteredHtml(Element elem, Element root, String prop,
562 Function fragmentGeneratedCallback) {
563 // Using a tighter root avoids false positives at the risk of trimming
564 // text we shouldn't.
565 root = findTighterRoot(elem, root);
566 final range = newRange();
567 range.setStartBefore(elem);
568
569 Element current = elem;
570 while (current != null) {
571 range.setEndBefore(current);
572 if (current.classes.contains(DART_REMOVED) &&
573 range.toString().trim().length > 0) {
574 break;
575 }
576 if (current.firstElementChild != null) {
577 current = current.firstElementChild;
578 } else {
579 while (current != null) {
580 range.setEndAfter(current);
581 if (current == root) {
582 current = null;
583 break;
584 }
585 if (current.nextElementSibling != null) {
586 current = current.nextElementSibling;
587 break;
588 }
589 current = current.parent;
590 }
591 }
592 }
593 String url = null;
594 if (prop != null) {
595 Range firstLine = findFirstLine(range, prop);
596 if (firstLine != null) {
597 range.setStart(firstLine.endContainer, firstLine.endOffset);
598 DocumentFragment firstLineClone = firstLine.cloneContents();
599 AnchorElement anchor = findAnchorElement(firstLineClone, prop);
600 if (anchor != null) {
601 url = getAbsoluteUrl(anchor);
602 }
603 }
604 }
605 final fragment = range.cloneContents();
606 if (fragmentGeneratedCallback != null) {
607 fragmentGeneratedCallback(fragment);
608 }
609 // Strip tags we don't want
610 for (Element e in fragment.queryAll("script, object, style")) {
611 e.remove();
612 }
613
614 // Extract idl
615 final idl = new StringBuffer();
616 if (prop != null && prop.length > 0) {
617 // Only expect properties to have HTML.
618 for(Element e in fragment.queryAll(IDL_SELECTOR)) {
619 idl.write(e.outerHTML);
620 e.remove();
621 }
622 // TODO(jacobr) this is a very basic regex to see if text looks like IDL
623 RegExp likelyIdl = new RegExp(" $prop\\w*\\(");
624
625 for (Element e in fragment.queryAll("pre")) {
626 // Check if it looks like idl...
627 String txt = e.text.trim();
628 if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) {
629 idl.write(e.outerHTML);
630 e.remove();
631 }
632 }
633 }
634 return new SectionParseResult(genCleanHtml(fragment), url, idl.toString());
635 }
636
637 /**
638 * Find the best child element of [root] that appears to be an API definition
639 * for [prop]. [allText] is a list of all text nodes under root computed by
640 * the caller to improve performance.
641 */
642 Element findBest(Element root, List<Text> allText, String prop,
643 String propType) {
644 // Best bet: find a child of root where the id matches the property name.
645 Element cand = root.query("#$prop");
646
647 if (cand == null && propType == "methods") {
648 cand = root.query("[id=$prop\\(\\)]");
649 }
650 while (cand != null && cand.text.trim().length == 0) {
651 // We found the bookmark for the element but sadly it is just an empty
652 // placeholder. Find the first real element.
653 cand = cand.nextElementSibling;
654 }
655 if (cand != null) {
656 return cand;
657 }
658
659 // If we are at least 70 pixels from the left, something is definitely
660 // fishy and we shouldn't even consider this candidate as nobody visually
661 // formats API docs like that.
662 num candLeft = 70;
663
664 for (Text text in allText) {
665 Element proposed = null;
666
667 // TODO(jacobr): does it hurt precision to use the full cleanup?
668 String t = fullNameCleanup(text.text);
669 if (t == prop) {
670 proposed = text.parent;
671 ClientRect candRect = getClientRect(proposed);
672
673 // TODO(jacobr): this is a good heuristic
674 // if (selObj.selector.indexOf(" > DD ") == -1
675 if (candRect.left < candLeft) {
676 cand = proposed;
677 candLeft = candRect.left;
678 }
679 }
680 }
681 return cand;
682 }
683
684 /**
685 * Checks whether [e] is tagged as obsolete or deprecated using heuristics
686 * for what these tags look like in the MDN docs.
687 */
688 bool isObsolete(Element e) {
689 RegExp obsoleteRegExp = new RegExp(r"(^|\s)obsolete(?=\s|$)");
690 RegExp deprecatedRegExp = new RegExp(r"(^|\s)deprecated(?=\s|$)");
691 for (Element child in e.queryAll("span")) {
692 String t = child.text.toLowerCase();
693 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true;
694 }
695
696 String text = e.text.toLowerCase();
697 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text);
698 }
699
700 bool isFirstCharLowerCase(String str) {
701 return new RegExp("^[a-z]").hasMatch(str);
702 }
703
704 /**
705 * Extracts information from a fragment of HTML only searching under the [root]
706 * html node. [secitonSelector] specifies the query to use to find candidate
707 * sections of the document to consider (there may be more than one).
708 * [currentType] specifies the name of the current class. [members] specifies
709 * the known class members for this class that we are attempting to find
710 * documentation for. [propType] indicates whether we are searching for
711 * methods, properties, constants, or constructors.
712 */
713 void scrapeSection(Element root, String sectionSelector, String currentType,
714 List members, String propType) {
715 Map expectedProps = dartIdl[propType];
716
717 Set<String> alreadyMatchedProperties = new Set<String>();
718 bool onlyConsiderTables = false;
719 ElementList allMatches = root.queryAll(sectionSelector);
720 if (allMatches.length == 0) {
721 // If we can't find any matches to the sectionSelector, we fall back to
722 // considering all tables in the document. This is dangerous so we only
723 // allow the safer table matching extraction rules for this case.
724 allMatches = root.queryAll(".fullwidth-table");
725 onlyConsiderTables = true;
726 }
727 for (Element matchElement in allMatches) {
728 final match = matchElement.parent;
729 if (!match.id.startsWith("section") && match.id != "pageText") {
730 throw "Unexpected element $match";
731 }
732 // We don't want to later display this text a second time while for example
733 // displaying class level summary information as then we would display
734 // the same documentation twice.
735 match.classes.add(DART_REMOVED);
736
737 bool foundProps = false;
738
739 // TODO(jacobr): we should really look for the table tag instead
740 // add an assert if we are missing something that is a table...
741 // TODO(jacobr) ignore tables in tables.
742 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) {
743 int helpIndex = -1;
744 num i = 0;
745 for (Element r in t.queryAll("th, td.header")) {
746 final txt = r.text.trim().split(" ")[0].toLowerCase();
747 if (txt == "description") {
748 helpIndex = i;
749 break;
750 }
751 i++;
752 }
753
754 // Figure out which column in the table contains member names by
755 // tracking how many member names each column contains.
756 final numMatches = new List<int>(i);
757 for (int j = 0; j < i; j++) {
758 numMatches[j] = 0;
759 }
760
761 // Find the column that seems to have the most names that look like
762 // expected properties.
763 for (Element r in t.queryAll("tbody tr")) {
764 ElementList row = r.elements;
765 if (row.length == 0 || row.first.classes.contains(".header")) {
766 continue;
767 }
768
769 for (int k = 0; k < numMatches.length && k < row.length; k++) {
770 if (expectedProps.containsKey(fullNameCleanup(row[k].text))) {
771 numMatches[k]++;
772 break;
773 }
774 }
775 }
776
777 int propNameIndex = 0;
778 {
779 int bestCount = numMatches[0];
780 for (int k = 1; k < numMatches.length; k++) {
781 if (numMatches[k] > bestCount) {
782 bestCount = numMatches[k];
783 propNameIndex = k;
784 }
785 }
786 }
787
788 for (Element r in t.queryAll("tbody tr")) {
789 final row = r.elements;
790 if (row.length > propNameIndex && row.length > helpIndex) {
791 if (row.first.classes.contains(".header")) {
792 continue;
793 }
794 // TODO(jacobr): this code for determining the namestr is needlessly
795 // messy.
796 final nameRow = row[propNameIndex];
797 AnchorElement a = nameRow.query("a");
798 String goodName = '';
799 if (a != null) {
800 goodName = a.text.trim();
801 }
802 String nameStr = nameRow.text;
803
804 Map entry = new Map<String, String>();
805
806 entry["name"] = fullNameCleanup(nameStr.length > 0 ?
807 nameStr : goodName);
808
809 final parse = filteredHtml(nameRow, nameRow, entry["name"], null);
810 String altHelp = parse.html;
811
812 entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ?
813 altHelp : genPrettyHtmlFromElement(row[helpIndex]);
814 if (parse.url != null) {
815 entry["url"] = parse.url;
816 }
817
818 if (parse.idl.length > 0) {
819 entry["idl"] = parse.idl;
820 }
821
822 entry["obsolete"] = isObsolete(r);
823
824 if (entry["name"].length > 0) {
825 cleanupEntry(members, entry);
826 alreadyMatchedProperties.add(entry['name']);
827 foundProps = true;
828 }
829 }
830 }
831 }
832
833 if (onlyConsiderTables) {
834 continue;
835 }
836
837 // After this point we have higher risk tests that attempt to perform
838 // rudimentary page segmentation. This approach is much more error-prone
839 // than using tables because the HTML is far less clearly structured.
840
841 final allText = getAllTextNodes(match);
842
843 final pmap = new Map<String, Element>();
844 for (final prop in expectedProps.keys) {
845 if (alreadyMatchedProperties.contains(prop)) {
846 continue;
847 }
848 final e = findBest(match, allText, prop, propType);
849 if (e != null && !inTable(e)) {
850 pmap[prop] = e;
851 }
852 }
853
854 for (final prop in pmap.keys) {
855 pmap[prop].classes.add(DART_REMOVED);
856 }
857
858 // The problem is the MDN docs do place documentation for each method in a
859 // nice self contained subtree. Instead you will see something like:
860
861 // <h3>drawImage</h3>
862 // <p>Draw image is an awesome method</p>
863 // some more info on drawImage here
864 // <h3>mozDrawWindow</h3>
865 // <p>This API cannot currently be used by Web content.
866 // It is chrome only.</p>
867 // <h3>drawRect</h3>
868 // <p>Always call drawRect instead of drawImage</p>
869 // some more info on drawRect here...
870
871 // The trouble is we will easily detect that the drawImage and drawRect
872 // entries are method definitions because we know to search for these
873 // method names but we will not detect that mozDrawWindow is a method
874 // definition as that method doesn't exist in our IDL. Thus if we are not
875 // careful the definition for the drawImage method will contain the
876 // definition for the mozDrawWindow method as well which would result in
877 // broken docs. We solve this problem by finding all content with similar
878 // visual structure to the already found method definitions. It turns out
879 // that using the visual position of each element on the page is much
880 // more reliable than using the DOM structure
881 // (e.g. section_root > div > h3) for the MDN docs because MDN authors
882 // carefully check that the documentation for each method comment is
883 // visually consistent but take less care to check that each
884 // method comment has identical markup structure.
885 for (String prop in pmap.keys) {
886 Element e = pmap[prop];
887 ClientRect r = getClientRect(e);
888 // TODO(jacobr): a lot of these queries are identical and this code
889 // could easily be optimized.
890 for (final cand in match.queryAll(e.tagName)) {
891 // TODO(jacobr): use a negative selector instead.
892 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) {
893 final candRect = getClientRect(cand);
894 // Only consider matches that have similar heights and identical left
895 // coordinates.
896 if (candRect.left == r.left &&
897 (candRect.height - r.height).abs() < 5) {
898 String propName = fullNameCleanup(cand.text);
899 if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName)
900 && !alreadyMatchedProperties.contains(propName)) {
901 pmap[propName] = cand;
902 }
903 }
904 }
905 }
906 }
907
908 // We mark these elements in batch to reduce the number of layouts
909 // triggered. TODO(jacobr): use new batch based async measurement to make
910 // this code flow simpler.
911 for (String prop in pmap.keys) {
912 Element e = pmap[prop];
913 e.classes.add(DART_REMOVED);
914 }
915
916 // Find likely "subsections" of the main section and mark them with
917 // DART_REMOVED so we don't include them in member descriptions... which
918 // would suck.
919 for (Element e in match.queryAll("[id]")) {
920 if (e.id.contains(matchElement.id)) {
921 e.classes.add(DART_REMOVED);
922 }
923 }
924
925 for (String prop in pmap.keys) {
926 Element elem = pmap[prop];
927 bool obsolete = false;
928 final parse = filteredHtml(
929 elem, match, prop,
930 (Element e) {
931 obsolete = isObsolete(e);
932 });
933 Map entry = {
934 "url" : parse.url,
935 "name" : prop,
936 "help" : parse.html,
937 "obsolete" : obsolete
938 };
939 if (parse.idl.length > 0) {
940 entry["idl"] = parse.idl;
941 }
942 cleanupEntry(members, entry);
943 }
944 }
945 }
946
947 String trimHtml(String html) {
948 // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc.
949 return html;
950 }
951
952 bool maybeName(String name) {
953 return new RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) ||
954 new RegExp("^[A-Z][A-Z_]*\$").hasMatch(name);
955 }
956
957 // TODO(jacobr): this element is ugly at the moment but will become easier to
958 // read once ElementList supports most of the Element functionality.
959 void markRemoved(var e) {
960 if (e != null) {
961 if (e is Element) {
962 e.classes.add(DART_REMOVED);
963 } else {
964 for (Element el in e) {
965 el.classes.add(DART_REMOVED);
966 }
967 }
968 }
969 }
970
971 // TODO(jacobr): remove this when the dartium JSON parse handles \n correctly.
972 String JSONFIXUPHACK(String value) {
973 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX");
974 }
975
976 String mozToWebkit(String name) {
977 return name.replaceFirst(new RegExp("^moz"), "webkit");
978 }
979
980 String stripWebkit(String name) {
981 return trimPrefix(name, "webkit");
982 }
983
984 // TODO(jacobr): be more principled about this.
985 String fullNameCleanup(String name) {
986 int parenIndex = name.indexOf('(');
987 if (parenIndex != -1) {
988 name = name.substring(0, parenIndex);
989 }
990 name = name.split(" ")[0];
991 name = name.split("\n")[0];
992 name = name.split("\t")[0];
993 name = name.split("*")[0];
994 name = name.trim();
995 name = safeNameCleanup(name);
996 return name;
997 }
998
999 // Less agressive than the full name cleanup to avoid overeager matching.
1000 // TODO(jacobr): be more principled about this.
1001 String safeNameCleanup(String name) {
1002 int parenIndex = name.indexOf('(');
1003 if (parenIndex != -1 && name.indexOf(")") != -1) {
1004 // TODO(jacobr): workaround bug in:
1005 // name = name.split("(")[0];
1006 name = name.substring(0, parenIndex);
1007 }
1008 name = name.trim();
1009 name = trimPrefix(name, currentType + ".");
1010 name = trimPrefix(name, currentType.toLowerCase() + ".");
1011 name = trimPrefix(name, currentTypeShort + ".");
1012 name = trimPrefix(name, currentTypeShort.toLowerCase() + ".");
1013 name = trimPrefix(name, currentTypeTiny + ".");
1014 name = trimPrefix(name, currentTypeTiny.toLowerCase() + ".");
1015 name = name.trim();
1016 name = mozToWebkit(name);
1017 return name;
1018 }
1019
1020 /**
1021 * Remove h1, h2, and h3 headers.
1022 */
1023 void removeHeaders(DocumentFragment fragment) {
1024 for (Element e in fragment.queryAll("h1, h2, h3")) {
1025 e.remove();
1026 }
1027 }
1028
1029 /**
1030 * Given an [entry] representing a single method or property cleanup the
1031 * values performing some simple normalization and only adding the entry to
1032 * [members] if it has a valid name.
1033 */
1034 void cleanupEntry(List members, Map entry) {
1035 if (entry.containsKey('help')) {
1036 entry['help'] = trimHtml(entry['help']);
1037 }
1038 String name = fullNameCleanup(entry['name']);
1039 entry['name'] = name;
1040 if (maybeName(name)) {
1041 for (String key in entry.keys) {
1042 var value = entry[key];
1043 if (value == null) {
1044 entry.remove(key);
1045 continue;
1046 }
1047 if (value is String) {
1048 entry[key] = JSONFIXUPHACK(value);
1049 }
1050 }
1051 members.add(entry);
1052 }
1053 }
1054
1055 // TODO(jacobr) dup with trim start....
1056 String trimPrefix(String str, String prefix) {
1057 if (str.indexOf(prefix) == 0) {
1058 return str.substring(prefix.length);
1059 } else {
1060 return str;
1061 }
1062 }
1063
1064 String trimStart(String str, String start) {
1065 if (str.startsWith(start) && str.length > start.length) {
1066 return str.substring(start.length);
1067 }
1068 return str;
1069 }
1070
1071 String trimEnd(String str, String end) {
1072 if (str.endsWith(end) && str.length > end.length) {
1073 return str.substring(0, str.length - end.length);
1074 }
1075 return str;
1076 }
1077
1078 /**
1079 * Extract a section with name [key] using [selector] to find start points for
1080 * the section in the document.
1081 */
1082 void extractSection(String selector, String key) {
1083 for (Element e in document.queryAll(selector)) {
1084 e = e.parent;
1085 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) {
1086 skip.remove();
1087 }
1088 String html = filteredHtml(e, e, null, removeHeaders).html;
1089 if (html.length > 0) {
1090 if (dbEntry.containsKey(key)) {
1091 dbEntry[key] += html;
1092 } else {
1093 dbEntry[key] = html;
1094 }
1095 }
1096 e.classes.add(DART_REMOVED);
1097 }
1098 }
1099
1100 void run() {
1101 // Inject CSS to ensure lines don't wrap unless they were intended to.
1102 // This is needed to make the logic to determine what is a single line
1103 // behave consistently even for very long method names.
1104 document.head.nodes.add(new Element.html("""
1105 <style type="text/css">
1106 body {
1107 width: 10000px;
1108 }
1109 </style>"""));
1110
1111 String title = trimEnd(window.document.title.trim(), " - MDN");
1112 dbEntry['title'] = title;
1113
1114 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best
1115 // place to do this...
1116 // TODO(jacobr): move this to right before we extract HTML.
1117
1118 // Remove the "Introduced in HTML <version>" boxes.
1119 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) {
1120 e.remove();
1121 }
1122
1123 // Flatten the list of known DOM types into a faster and case-insensitive
1124 // map.
1125 domTypes = {};
1126 for (final domType in domTypesRaw) {
1127 domTypes[domType.toLowerCase()] = domType;
1128 }
1129
1130 // Fix up links.
1131 final SHORT_LINK = new RegExp(r'^[\w/]+$');
1132 final INNER_LINK = new RegExp(r'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$');
1133 final MEMBER_LINK = new RegExp(r'(\w+)[.#](\w+)');
1134 final RELATIVE_LINK = new RegExp(r'^(?:../)*/?[Ee][Nn]/(.+)');
1135
1136 // - Make relative links absolute.
1137 // - If we can, take links that point to other MDN pages and retarget them
1138 // to appropriate pages in our docs.
1139 // TODO(rnystrom): Add rel external to links we didn't fix.
1140 for (AnchorElement a in document.queryAll('a')) {
1141 // Get the raw attribute because we *don't* want the browser to fully-
1142 // qualify the name for us since it has the wrong base address for the
1143 // page.
1144 var href = a.attributes['href'];
1145
1146 // Ignore busted links.
1147 if (href == null) continue;
1148
1149 // If we can recognize what it's pointing to, point it to our page instead.
1150 tryToLinkToRealType(maybeType) {
1151 // See if we know a type with that name.
1152 final realType = domTypes[maybeType.toLowerCase()];
1153 if (realType != null) {
1154 href = '../html/$realType.html';
1155 }
1156 }
1157
1158 // If it's a relative link (that we know how to root), make it absolute.
1159 var match = RELATIVE_LINK.firstMatch(href);
1160 if (match != null) {
1161 href = 'https://developer.mozilla.org/en/${match[1]}';
1162 }
1163
1164 // If it's a word link like "foo" find a type or make it absolute.
1165 match = SHORT_LINK.firstMatch(href);
1166 if (match != null) {
1167 href = 'https://developer.mozilla.org/en/DOM/${match[0]}';
1168 }
1169
1170 // TODO(rnystrom): This is a terrible way to do this. Should use the real
1171 // mapping from DOM names to html class names that we use elsewhere in the
1172 // DOM scripts.
1173 match = INNER_LINK.firstMatch(href);
1174 if (match != null) {
1175 // See if we're linking to a member ("type.name" or "type#name") or just
1176 // a type ("type").
1177 final member = MEMBER_LINK.firstMatch(match[1]);
1178 if (member != null) {
1179 tryToLinkToRealType(member[1]);
1180 } else {
1181 tryToLinkToRealType(match[1]);
1182 }
1183 }
1184
1185 // Put it back into the element.
1186 a.attributes['href'] = href;
1187 }
1188
1189 if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) {
1190 bool foundMatch = false;
1191 // Test out if the title is really an HTML tag that matches the
1192 // current class name.
1193 for (String tag in [title.split(" ")[0], title.split(".").last]) {
1194 try {
1195 Element element = new Element.tag(tag);
1196 // TODO(jacobr): this is a really ugly way of doing this that will
1197 // stop working at some point soon.
1198 if (element.typeName == currentType) {
1199 foundMatch = true;
1200 break;
1201 }
1202 } catch (e) {}
1203 }
1204 if (!foundMatch) {
1205 dbEntry['skipped'] = true;
1206 dbEntry['cause'] = "Suspect title";
1207 onEnd();
1208 return;
1209 }
1210 }
1211
1212 Element root = document.query(".pageText");
1213 if (root == null) {
1214 dbEntry['cause'] = '.pageText not found';
1215 onEnd();
1216 return;
1217 }
1218
1219 markRemoved(root.query("#Notes"));
1220 List members = dbEntry['members'];
1221
1222 // This is a laundry list of CSS selectors for boilerplate content on the
1223 // MDN pages that we should ignore for the purposes of extracting
1224 // documentation.
1225 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar"));
1226 markRemoved(document.queryAll("#article-nav"));
1227 markRemoved(document.queryAll(".hideforedit"));
1228 markRemoved(document.queryAll(".navbox"));
1229 markRemoved(document.query("#Method_overview"));
1230 markRemoved(document.queryAll("h1, h2"));
1231
1232 scrapeSection(root, "#Methods", currentType, members, 'methods');
1233 scrapeSection(root, "#Constants, #Error_codes, #State_constants",
1234 currentType, members, 'constants');
1235 // TODO(jacobr): infer tables based on multiple matches rather than
1236 // using a hard coded list of section ids.
1237 scrapeSection(root,
1238 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " +
1239 "#DOM_properties, #Event_handlers, #Event_Handlers",
1240 currentType, members, 'properties');
1241
1242 // Avoid doing this till now to avoid messing up the section scrape.
1243 markRemoved(document.queryAll("h3"));
1244
1245 ElementList examples = root.queryAll("span[id^=example], span[id^=Example]");
1246
1247 extractSection("#See_also", 'seeAlso');
1248 extractSection("#Specification, #Specifications", "specification");
1249
1250 // TODO(jacobr): actually extract the constructor(s)
1251 extractSection("#Constructor, #Constructors", 'constructor');
1252 extractSection("#Browser_compatibility, #Compatibility", 'compatibility');
1253
1254 // Extract examples.
1255 List<String> exampleHtml = [];
1256 for (Element e in examples) {
1257 e.classes.add(DART_REMOVED);
1258 }
1259 for (Element e in examples) {
1260 String html = filteredHtml(e, root, null,
1261 (DocumentFragment fragment) {
1262 removeHeaders(fragment);
1263 if (fragment.text.trim().toLowerCase() == "example") {
1264 // Degenerate example.
1265 fragment.nodes.clear();
1266 }
1267 }).html;
1268 if (html.length > 0) {
1269 exampleHtml.add(html);
1270 }
1271 }
1272 if (exampleHtml.length > 0) {
1273 dbEntry['examples'] = exampleHtml;
1274 }
1275
1276 // Extract the class summary.
1277 // Basically everything left over after the #Summary or #Description tag is
1278 // safe to include in the summary.
1279 StringBuffer summary = new StringBuffer();
1280 for (Element e in root.queryAll("#Summary, #Description")) {
1281 summary.write(filteredHtml(root, e, null, removeHeaders).html);
1282 }
1283
1284 if (summary.length == 0) {
1285 // Remove the "Gecko DOM Reference text"
1286 Element ref = root.query(".lang.lang-en");
1287 if (ref != null) {
1288 ref = ref.parent;
1289 String refText = ref.text.trim();
1290 if (refText == "Gecko DOM Reference" ||
1291 refText == "« Gecko DOM Reference") {
1292 ref.remove();
1293 }
1294 }
1295 // Risky... this might add stuff we shouldn't.
1296 summary.write(filteredHtml(root, root, null, removeHeaders).html);
1297 }
1298
1299 if (summary.length > 0) {
1300 dbEntry['summary'] = summary.toString();
1301 }
1302
1303 // Inject CSS to aid debugging in the browser.
1304 // We could avoid doing this if we know we are not running in a browser..
1305 document.head.nodes.add(new Element.html(DEBUG_CSS));
1306
1307 onEnd();
1308 }
1309
1310 void main() {
1311 window.on.load.add(documentLoaded);
1312 }
1313
1314 void documentLoaded(event) {
1315 // Load the database of expected methods and properties with an HttpRequest.
1316 new HttpRequest.get('${window.location}.json', (req) {
1317 data = JSON.decode(req.responseText);
1318 dbEntry = {'members': [], 'srcUrl': pageUrl};
1319 run();
1320 });
1321 }
OLDNEW
« no previous file with comments | « utils/apidoc/mdn/data/domTypes.json ('k') | utils/apidoc/mdn/extract.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698