OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 import "dart:collection"; | |
6 import 'dart:convert'; | |
7 import 'dart:html'; | |
8 | |
9 // Workaround for HTML lib missing feature. | |
10 Range newRange() { | |
11 return document.createRange(); | |
12 } | |
13 | |
14 // Temporary range object to optimize performance computing client rects | |
15 // from text nodes. | |
16 Range _tempRange; | |
17 // Hacks because ASYNC measurement is annoying when just writing a script. | |
18 ClientRect getClientRect(Node n) { | |
19 if (n is Element) { | |
20 return n.$dom_getBoundingClientRect(); | |
21 } else { | |
22 // Crazy hacks that works for nodes.... create a range and measure it. | |
23 if (_tempRange == null) { | |
24 _tempRange = newRange(); | |
25 } | |
26 _tempRange.setStartBefore(n); | |
27 _tempRange.setEndAfter(n); | |
28 return _tempRange.getBoundingClientRect(); | |
29 } | |
30 } | |
31 | |
32 /** | |
33 * CSS class that is added to elements in the DOM to indicate that they should | |
34 * be removed when extracting blocks of documentation. This is helpful when | |
35 * running this script in a web browser as it is easy to visually see what | |
36 * blocks of information were extracted when using CSS such as DEBUG_CSS | |
37 * which highlights elements that should be removed. | |
38 */ | |
39 const DART_REMOVED = "dart-removed"; | |
40 | |
41 const DEBUG_CSS = """ | |
42 <style type="text/css"> | |
43 .dart-removed { | |
44 background-color: rgba(255, 0, 0, 0.5); | |
45 } | |
46 </style>"""; | |
47 | |
48 const MIN_PIXELS_DIFFERENT_LINES = 10; | |
49 | |
50 const IDL_SELECTOR = "pre.eval, pre.idl"; | |
51 | |
52 Map data; | |
53 | |
54 // TODO(rnystrom): Hack! Copied from domTypes.json. Instead of hard-coding | |
55 // these, should use the same mapping that the DOM/HTML code generators use. | |
56 var domTypes; | |
57 const domTypesRaw = const [ | |
58 "AbstractWorker", "ArrayBuffer", "ArrayBufferView", "Attr", | |
59 "AudioBuffer", "AudioBufferSourceNode", "AudioChannelMerger", | |
60 "AudioChannelSplitter", "AudioContext", "AudioDestinationNode", | |
61 "AudioGain", "AudioGainNode", "AudioListener", "AudioNode", | |
62 "AudioPannerNode", "AudioParam", "AudioProcessingEvent", | |
63 "AudioSourceNode", "BarInfo", "BeforeLoadEvent", "BiquadFilterNode", | |
64 "Blob", "CDATASection", "CSSCharsetRule", "CSSFontFaceRule", | |
65 "CSSImportRule", "CSSMediaRule", "CSSPageRule", "CSSPrimitiveValue", | |
66 "CSSRule", "CSSRuleList", "CSSStyleDeclaration", "CSSStyleRule", | |
67 "CSSStyleSheet", "CSSUnknownRule", "CSSValue", "CSSValueList", | |
68 "CanvasGradient", "CanvasPattern", "CanvasPixelArray", | |
69 "CanvasRenderingContext", "CanvasRenderingContext2D", | |
70 "CharacterData", "ClientRect", "ClientRectList", "Clipboard", | |
71 "CloseEvent", "Comment", "CompositionEvent", "Console", | |
72 "ConvolverNode", "Coordinates", "Counter", "Crypto", "CustomEvent", | |
73 "DOMApplicationCache", "DOMException", "DOMFileSystem", | |
74 "DOMFileSystemSync", "DOMFormData", "DOMImplementation", | |
75 "DOMMimeType", "DOMMimeTypeArray", "DOMParser", "DOMPlugin", | |
76 "DOMPluginArray", "DOMSelection", "DOMSettableTokenList", | |
77 "DOMTokenList", "DOMURL", "DOMWindow", "DataTransferItem", | |
78 "DataTransferItemList", "DataView", "Database", "DatabaseSync", | |
79 "DedicatedWorkerContext", "DelayNode", "DeviceMotionEvent", | |
80 "DeviceOrientationEvent", "DirectoryEntry", "DirectoryEntrySync", | |
81 "DirectoryReader", "DirectoryReaderSync", "Document", | |
82 "DocumentFragment", "DocumentType", "DynamicsCompressorNode", | |
83 "Element", "ElementTimeControl", "ElementTraversal", "Entity", | |
84 "EntityReference", "Entry", "EntryArray", "EntryArraySync", | |
85 "EntrySync", "ErrorEvent", "Event", "EventException", "EventSource", | |
86 "EventTarget", "File", "FileEntry", "FileEntrySync", "FileError", | |
87 "FileException", "FileList", "FileReader", "FileReaderSync", | |
88 "FileWriter", "FileWriterSync", "Float32Array", "Float64Array", | |
89 "Geolocation", "Geoposition", "HTMLAllCollection", | |
90 "HTMLAnchorElement", "HTMLAppletElement", "HTMLAreaElement", | |
91 "HTMLAudioElement", "HTMLBRElement", "HTMLBaseElement", | |
92 "HTMLBaseFontElement", "HTMLBodyElement", "HTMLButtonElement", | |
93 "HTMLCanvasElement", "HTMLCollection", "HTMLDListElement", | |
94 "HTMLDataListElement", "HTMLDetailsElement", "HTMLDirectoryElement", | |
95 "HTMLDivElement", "HTMLDocument", "HTMLElement", "HTMLEmbedElement", | |
96 "HTMLFieldSetElement", "HTMLFontElement", "HTMLFormElement", | |
97 "HTMLFrameElement", "HTMLFrameSetElement", "HTMLHRElement", | |
98 "HTMLHeadElement", "HTMLHeadingElement", "HTMLHtmlElement", | |
99 "HTMLIFrameElement", "HTMLImageElement", "HTMLInputElement", | |
100 "HTMLIsIndexElement", "HTMLKeygenElement", "HTMLLIElement", | |
101 "HTMLLabelElement", "HTMLLegendElement", "HTMLLinkElement", | |
102 "HTMLMapElement", "HTMLMarqueeElement", "HTMLMediaElement", | |
103 "HTMLMenuElement", "HTMLMetaElement", "HTMLMeterElement", | |
104 "HTMLModElement", "HTMLOListElement", "HTMLObjectElement", | |
105 "HTMLOptGroupElement", "HTMLOptionElement", "HTMLOptionsCollection", | |
106 "HTMLOutputElement", "HTMLParagraphElement", "HTMLParamElement", | |
107 "HTMLPreElement", "HTMLProgressElement", "HTMLQuoteElement", | |
108 "HTMLScriptElement", "HTMLSelectElement", "HTMLSourceElement", | |
109 "HTMLSpanElement", "HTMLStyleElement", "HTMLTableCaptionElement", | |
110 "HTMLTableCellElement", "HTMLTableColElement", "HTMLTableElement", | |
111 "HTMLTableRowElement", "HTMLTableSectionElement", | |
112 "HTMLTextAreaElement", "HTMLTitleElement", "HTMLTrackElement", | |
113 "HTMLUListElement", "HTMLUnknownElement", "HTMLVideoElement", | |
114 "HashChangeEvent", "HighPass2FilterNode", "History", "IDBAny", | |
115 "IDBCursor", "IDBCursorWithValue", "IDBDatabase", | |
116 "IDBDatabaseError", "IDBDatabaseException", "IDBFactory", | |
117 "IDBIndex", "IDBKey", "IDBKeyRange", "IDBObjectStore", "IDBRequest", | |
118 "IDBTransaction", "IDBVersionChangeEvent", | |
119 "IDBVersionChangeRequest", "ImageData", "InjectedScriptHost", | |
120 "InspectorFrontendHost", "Int16Array", "Int32Array", "Int8Array", | |
121 "JavaScriptAudioNode", "JavaScriptCallFrame", "KeyboardEvent", | |
122 "Location", "LowPass2FilterNode", "MediaElementAudioSourceNode", | |
123 "MediaError", "MediaList", "MediaQueryList", | |
124 "MediaQueryListListener", "MemoryInfo", "MessageChannel", | |
125 "MessageEvent", "MessagePort", "Metadata", "MouseEvent", | |
126 "MutationCallback", "MutationEvent", "MutationRecord", | |
127 "NamedNodeMap", "Navigator", "NavigatorUserMediaError", | |
128 "NavigatorUserMediaSuccessCallback", "Node", "NodeFilter", | |
129 "NodeIterator", "NodeList", "NodeSelector", "Notation", | |
130 "Notification", "NotificationCenter", "OESStandardDerivatives", | |
131 "OESTextureFloat", "OESVertexArrayObject", | |
132 "OfflineAudioCompletionEvent", "OperationNotAllowedException", | |
133 "OverflowEvent", "PageTransitionEvent", "Performance", | |
134 "PerformanceNavigation", "PerformanceTiming", "PopStateEvent", | |
135 "PositionError", "ProcessingInstruction", "ProgressEvent", | |
136 "RGBColor", "Range", "RangeException", "RealtimeAnalyserNode", | |
137 "Rect", "SQLError", "SQLException", "SQLResultSet", | |
138 "SQLResultSetRowList", "SQLTransaction", "SQLTransactionSync", | |
139 "SVGAElement", "SVGAltGlyphDefElement", "SVGAltGlyphElement", | |
140 "SVGAltGlyphItemElement", "SVGAngle", "SVGAnimateColorElement", | |
141 "SVGAnimateElement", "SVGAnimateMotionElement", | |
142 "SVGAnimateTransformElement", "SVGAnimatedAngle", | |
143 "SVGAnimatedBoolean", "SVGAnimatedEnumeration", | |
144 "SVGAnimatedInteger", "SVGAnimatedLength", "SVGAnimatedLengthList", | |
145 "SVGAnimatedNumber", "SVGAnimatedNumberList", | |
146 "SVGAnimatedPreserveAspectRatio", "SVGAnimatedRect", | |
147 "SVGAnimatedString", "SVGAnimatedTransformList", | |
148 "SVGAnimationElement", "SVGCircleElement", "SVGClipPathElement", | |
149 "SVGColor", "SVGComponentTransferFunctionElement", | |
150 "SVGCursorElement", "SVGDefsElement", "SVGDescElement", | |
151 "SVGDocument", "SVGElement", "SVGElementInstance", | |
152 "SVGElementInstanceList", "SVGEllipseElement", "SVGException", | |
153 "SVGExternalResourcesRequired", "SVGFEBlendElement", | |
154 "SVGFEColorMatrixElement", "SVGFEComponentTransferElement", | |
155 "SVGFECompositeElement", "SVGFEConvolveMatrixElement", | |
156 "SVGFEDiffuseLightingElement", "SVGFEDisplacementMapElement", | |
157 "SVGFEDistantLightElement", "SVGFEDropShadowElement", | |
158 "SVGFEFloodElement", "SVGFEFuncAElement", "SVGFEFuncBElement", | |
159 "SVGFEFuncGElement", "SVGFEFuncRElement", | |
160 "SVGFEGaussianBlurElement", "SVGFEImageElement", | |
161 "SVGFEMergeElement", "SVGFEMergeNodeElement", | |
162 "SVGFEMorphologyElement", "SVGFEOffsetElement", | |
163 "SVGFEPointLightElement", "SVGFESpecularLightingElement", | |
164 "SVGFESpotLightElement", "SVGFETileElement", | |
165 "SVGFETurbulenceElement", "SVGFilterElement", | |
166 "SVGFilterPrimitiveStandardAttributes", "SVGFitToViewBox", | |
167 "SVGFontElement", "SVGFontFaceElement", "SVGFontFaceFormatElement", | |
168 "SVGFontFaceNameElement", "SVGFontFaceSrcElement", | |
169 "SVGFontFaceUriElement", "SVGForeignObjectElement", "SVGGElement", | |
170 "SVGGlyphElement", "SVGGlyphRefElement", "SVGGradientElement", | |
171 "SVGHKernElement", "SVGImageElement", "SVGLangSpace", "SVGLength", | |
172 "SVGLengthList", "SVGLineElement", "SVGLinearGradientElement", | |
173 "SVGLocatable", "SVGMPathElement", "SVGMarkerElement", | |
174 "SVGMaskElement", "SVGMatrix", "SVGMetadataElement", | |
175 "SVGMissingGlyphElement", "SVGNumber", "SVGNumberList", "SVGPaint", | |
176 "SVGPathElement", "SVGPathSeg", "SVGPathSegArcAbs", | |
177 "SVGPathSegArcRel", "SVGPathSegClosePath", | |
178 "SVGPathSegCurvetoCubicAbs", "SVGPathSegCurvetoCubicRel", | |
179 "SVGPathSegCurvetoCubicSmoothAbs", | |
180 "SVGPathSegCurvetoCubicSmoothRel", "SVGPathSegCurvetoQuadraticAbs", | |
181 "SVGPathSegCurvetoQuadraticRel", | |
182 "SVGPathSegCurvetoQuadraticSmoothAbs", | |
183 "SVGPathSegCurvetoQuadraticSmoothRel", "SVGPathSegLinetoAbs", | |
184 "SVGPathSegLinetoHorizontalAbs", "SVGPathSegLinetoHorizontalRel", | |
185 "SVGPathSegLinetoRel", "SVGPathSegLinetoVerticalAbs", | |
186 "SVGPathSegLinetoVerticalRel", "SVGPathSegList", | |
187 "SVGPathSegMovetoAbs", "SVGPathSegMovetoRel", "SVGPatternElement", | |
188 "SVGPoint", "SVGPointList", "SVGPolygonElement", | |
189 "SVGPolylineElement", "SVGPreserveAspectRatio", | |
190 "SVGRadialGradientElement", "SVGRect", "SVGRectElement", | |
191 "SVGRenderingIntent", "SVGSVGElement", "SVGScriptElement", | |
192 "SVGSetElement", "SVGStopElement", "SVGStringList", "SVGStylable", | |
193 "SVGStyleElement", "SVGSwitchElement", "SVGSymbolElement", | |
194 "SVGTRefElement", "SVGTSpanElement", "SVGTests", | |
195 "SVGTextContentElement", "SVGTextElement", "SVGTextPathElement", | |
196 "SVGTextPositioningElement", "SVGTitleElement", "SVGTransform", | |
197 "SVGTransformList", "SVGTransformable", "SVGURIReference", | |
198 "SVGUnitTypes", "SVGUseElement", "SVGVKernElement", | |
199 "SVGViewElement", "SVGViewSpec", "SVGZoomAndPan", "SVGZoomEvent", | |
200 "Screen", "ScriptProfile", "ScriptProfileNode", "SharedWorker", | |
201 "SharedWorkercontext", "SpeechInputEvent", "SpeechInputResult", | |
202 "SpeechInputResultList", "Storage", "StorageEvent", "StorageInfo", | |
203 "StyleMedia", "StyleSheet", "StyleSheetList", "Text", "TextEvent", | |
204 "TextMetrics", "TextTrack", "TextTrackCue", "TextTrackCueList", | |
205 "TimeRanges", "Touch", "TouchEvent", "TouchList", "TreeWalker", | |
206 "UIEvent", "Uint16Array", "Uint32Array", "Uint8Array", | |
207 "ValidityState", "VoidCallback", "WaveShaperNode", | |
208 "WebGLActiveInfo", "WebGLBuffer", "WebGLContextAttributes", | |
209 "WebGLContextEvent", "WebGLDebugRendererInfo", "WebGLDebugShaders", | |
210 "WebGLFramebuffer", "WebGLProgram", "WebGLRenderbuffer", | |
211 "WebGLRenderingContext", "WebGLShader", "WebGLTexture", | |
212 "WebGLUniformLocation", "WebGLVertexArrayObjectOES", | |
213 "WebKitAnimation", "WebKitAnimationEvent", "WebKitAnimationList", | |
214 "WebKitBlobBuilder", "WebKitCSSFilterValue", | |
215 "WebKitCSSKeyframeRule", "WebKitCSSKeyframesRule", | |
216 "WebKitCSSMatrix", "WebKitCSSTransformValue", "WebKitFlags", | |
217 "WebKitLoseContext", "WebKitMutationObserver", "WebKitPoint", | |
218 "WebKitTransitionEvent", "WebSocket", "WheelEvent", "Worker", | |
219 "WorkerContext", "WorkerLocation", "WorkerNavigator", | |
220 "XMLHttpRequest", "XMLHttpRequestException", | |
221 "XMLHttpRequestProgressEvent", "XMLHttpRequestUpload", | |
222 "XMLSerializer", "XPathEvaluator", "XPathException", | |
223 "XPathExpression", "XPathNSResolver", "XPathResult", | |
224 "XSLTProcessor", "AudioBufferCallback", "DatabaseCallback", | |
225 "EntriesCallback", "EntryCallback", "ErrorCallback", "FileCallback", | |
226 "FileSystemCallback", "FileWriterCallback", "MetadataCallback", | |
227 "NavigatorUserMediaErrorCallback", "PositionCallback", | |
228 "PositionErrorCallback", "SQLStatementCallback", | |
229 "SQLStatementErrorCallback", "SQLTransactionCallback", | |
230 "SQLTransactionErrorCallback", "SQLTransactionSyncCallback", | |
231 "StorageInfoErrorCallback", "StorageInfoQuotaCallback", | |
232 "StorageInfoUsageCallback", "StringCallback" | |
233 ]; | |
234 | |
235 Map dbEntry; | |
236 | |
237 Map get dartIdl => data['dartIdl']; | |
238 String get currentType => data['type']; | |
239 | |
240 String _currentTypeShort; | |
241 String get currentTypeShort { | |
242 if (_currentTypeShort == null) { | |
243 _currentTypeShort = currentType; | |
244 _currentTypeShort = trimPrefix(_currentTypeShort, "HTML"); | |
245 _currentTypeShort = trimPrefix(_currentTypeShort, "SVG"); | |
246 _currentTypeShort = trimPrefix(_currentTypeShort, "DOM"); | |
247 _currentTypeShort = trimPrefix(_currentTypeShort, "WebKit"); | |
248 _currentTypeShort = trimPrefix(_currentTypeShort, "Webkit"); | |
249 } | |
250 return _currentTypeShort; | |
251 } | |
252 | |
253 String _currentTypeTiny; | |
254 String get currentTypeTiny { | |
255 if (_currentTypeTiny == null) { | |
256 _currentTypeTiny = currentTypeShort; | |
257 _currentTypeTiny = trimEnd(_currentTypeTiny, "Element"); | |
258 } | |
259 return _currentTypeTiny; | |
260 } | |
261 | |
262 Map get searchResult => data['searchResult']; | |
263 String get pageUrl => searchResult['link']; | |
264 | |
265 String _pageDomain; | |
266 String get pageDomain { | |
267 if (_pageDomain == null) { | |
268 _pageDomain = pageUrl.substring(0, pageUrl.indexOf("/", "https://".length)); | |
269 } | |
270 return _pageDomain; | |
271 } | |
272 | |
273 String get pageDir { | |
274 return pageUrl.substring(0, pageUrl.lastIndexOf('/') + 1); | |
275 } | |
276 | |
277 String getAbsoluteUrl(AnchorElement anchor) { | |
278 if (anchor == null || anchor.href.length == 0) return ''; | |
279 String path = anchor.href; | |
280 RegExp fullUrlRegExp = new RegExp("^https?://"); | |
281 if (fullUrlRegExp.hasMatch(path)) return path; | |
282 if (path.startsWith('/')) { | |
283 return "$pageDomain$path"; | |
284 } else if (path.startsWith("#")) { | |
285 return "$pageUrl$path"; | |
286 } else { | |
287 return "$pageDir$path"; | |
288 } | |
289 } | |
290 | |
291 bool inTable(Node n) { | |
292 while (n != null) { | |
293 if (n is TableElement) return true; | |
294 n = n.parent; | |
295 } | |
296 return false; | |
297 } | |
298 | |
299 String escapeHTML(str) { | |
300 Element e = new Element.tag("div"); | |
301 e.text = str; | |
302 return e.innerHTML; | |
303 } | |
304 | |
305 List<Text> getAllTextNodes(Element elem) { | |
306 final nodes = <Text>[]; | |
307 helper(Node n) { | |
308 if (n is Text) { | |
309 nodes.add(n); | |
310 } else { | |
311 for (Node child in n.nodes) { | |
312 helper(child); | |
313 } | |
314 } | |
315 }; | |
316 | |
317 helper(elem); | |
318 return nodes; | |
319 } | |
320 | |
321 /** | |
322 * Whether a node and its children are all types that are safe to skip if the | |
323 * nodes have no text content. | |
324 */ | |
325 bool isSkippableType(Node n) { | |
326 // TODO(jacobr): are there any types we don't want to skip even if they | |
327 // have no text content? | |
328 if (n is ImageElement || n is CanvasElement || n is InputElement | |
329 || n is ObjectElement) { | |
330 return false; | |
331 } | |
332 if (n is Text) return true; | |
333 | |
334 for (final child in n.nodes) { | |
335 if (!isSkippableType(child)) { | |
336 return false; | |
337 } | |
338 } | |
339 return true; | |
340 } | |
341 | |
342 bool isSkippable(Node n) { | |
343 if (!isSkippableType(n)) return false; | |
344 return n.text.trim().length == 0; | |
345 } | |
346 | |
347 void onEnd() { | |
348 // Hideous hack to send JSON back to JS. | |
349 String dbJson = JSON.encode(dbEntry); | |
350 // workaround bug in JSON.decode. | |
351 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n"); | |
352 | |
353 // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple | |
354 // isolate based Dart-JS interop solution in the future. | |
355 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*"); | |
356 } | |
357 | |
358 class SectionParseResult { | |
359 final String html; | |
360 final String url; | |
361 final String idl; | |
362 SectionParseResult(this.html, this.url, this.idl); | |
363 } | |
364 | |
365 String genCleanHtml(Element root) { | |
366 for (final e in root.queryAll(".$DART_REMOVED")) { | |
367 e.classes.remove(DART_REMOVED); | |
368 } | |
369 | |
370 // Ditch inline styles. | |
371 for (final e in root.queryAll('[style]')) { | |
372 e.attributes.remove('style'); | |
373 } | |
374 | |
375 // These elements are just tags that we should suppress. | |
376 for (final e in root.queryAll(".lang.lang-en")) { | |
377 e.remove(); | |
378 } | |
379 | |
380 Element parametersHeader; | |
381 Element returnValueHeader; | |
382 for (final e in root.queryAll("h6")) { | |
383 if (e.text == 'Parameters') { | |
384 parametersHeader = e; | |
385 } else if (e.text == 'Return value') { | |
386 returnValueHeader = e; | |
387 } | |
388 } | |
389 | |
390 if (parametersHeader != null) { | |
391 int numEmptyParameters = 0; | |
392 final parameterDescriptions = root.queryAll("dd"); | |
393 for (Element parameterDescription in parameterDescriptions) { | |
394 if (parameterDescription.text.trim().length == 0) { | |
395 numEmptyParameters++; | |
396 } | |
397 } | |
398 if (numEmptyParameters > 0 && | |
399 numEmptyParameters == parameterDescriptions.length) { | |
400 // Remove the parameter list as it adds zero value as all descriptions | |
401 // are empty. | |
402 parametersHeader.remove(); | |
403 for (final e in root.queryAll("dl")) { | |
404 e.remove(); | |
405 } | |
406 } else if (parameterDescriptions.length == 0 && | |
407 parametersHeader.nextElementSibling != null && | |
408 parametersHeader.nextElementSibling.text.trim() == 'None.') { | |
409 // No need to display that the function takes 0 parameters. | |
410 parametersHeader.nextElementSibling.remove(); | |
411 parametersHeader.remove(); | |
412 } | |
413 } | |
414 | |
415 // Heuristic: if the return value is a single word it is a type name not a | |
416 // useful text description so suppress it. | |
417 if (returnValueHeader != null && | |
418 returnValueHeader.nextElementSibling != null && | |
419 returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) { | |
420 returnValueHeader.nextElementSibling.remove(); | |
421 returnValueHeader.remove(); | |
422 } | |
423 | |
424 bool changed = true; | |
425 while (changed) { | |
426 changed = false; | |
427 while (root.nodes.length == 1 && root.nodes.first is Element) { | |
428 root = root.nodes.first; | |
429 changed = true; | |
430 } | |
431 | |
432 // Trim useless nodes from the front. | |
433 while (root.nodes.length > 0 && | |
434 isSkippable(root.nodes.first)) { | |
435 root.nodes.first.remove(); | |
436 changed = true; | |
437 } | |
438 | |
439 // Trim useless nodes from the back. | |
440 while (root.nodes.length > 0 && | |
441 isSkippable(root.nodes.last)) { | |
442 root.nodes.last.remove(); | |
443 changed = true; | |
444 } | |
445 } | |
446 return JSONFIXUPHACK(root.innerHTML); | |
447 } | |
448 | |
449 String genPrettyHtmlFromElement(Element e) { | |
450 e = e.clone(true); | |
451 return genCleanHtml(e); | |
452 } | |
453 | |
454 class PostOrderTraversalIterator implements Iterator<Node> { | |
455 | |
456 Node _next; | |
457 Node _current; | |
458 | |
459 PostOrderTraversalIterator(Node start) { | |
460 _next = _leftMostDescendent(start); | |
461 } | |
462 | |
463 Node get current => _current; | |
464 bool get hasNext => _next != null; | |
465 | |
466 bool moveNext() { | |
467 _current = _next; | |
468 if (_next == null) return false; | |
469 if (_next.nextNode != null) { | |
470 _next = _leftMostDescendent(_next.nextNode); | |
471 } else { | |
472 _next = _next.parent; | |
473 } | |
474 return true; | |
475 } | |
476 | |
477 static Node _leftMostDescendent(Node n) { | |
478 while (n.nodes.length > 0) { | |
479 n = n.nodes.first; | |
480 } | |
481 return n; | |
482 } | |
483 } | |
484 | |
485 class PostOrderTraversal extends IterableBase<Node> { | |
486 final Node _node; | |
487 PostOrderTraversal(this._node); | |
488 | |
489 Iterator<Node> get iterator => new PostOrderTraversalIterator(_node); | |
490 } | |
491 | |
492 /** | |
493 * Estimate what content represents the first line of text within the [section] | |
494 * range returning null if there isn't a plausible first line of text that | |
495 * contains the string [prop]. We measure the actual rendered client rectangle | |
496 * for the text and use heuristics defining how many pixels text can vary by | |
497 * and still be viewed as being on the same line. | |
498 */ | |
499 Range findFirstLine(Range section, String prop) { | |
500 final firstLine = newRange(); | |
501 firstLine.setStart(section.startContainer, section.startOffset); | |
502 | |
503 num maxBottom = null; | |
504 for (final n in new PostOrderTraversal(section.startContainer)) { | |
505 int compareResult = section.comparePoint(n, 0); | |
506 if (compareResult == -1) { | |
507 // before range so skip. | |
508 continue; | |
509 } else if (compareResult > 0) { | |
510 // After range so exit. | |
511 break; | |
512 } | |
513 | |
514 final rect = getClientRect(n); | |
515 num bottom = rect.bottom; | |
516 if (rect.height > 0 && rect.width > 0) { | |
517 if (maxBottom != null && | |
518 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) { | |
519 break; | |
520 } else if (maxBottom == null || maxBottom > bottom) { | |
521 maxBottom = bottom; | |
522 } | |
523 } | |
524 | |
525 firstLine.setEndAfter(n); | |
526 } | |
527 | |
528 // If the first line of text in the section does not contain the property | |
529 // name then we're not confident we are able to extract a high accuracy match | |
530 // so we should not return anything. | |
531 if (!firstLine.toString().contains(stripWebkit(prop))) { | |
532 return null; | |
533 } | |
534 return firstLine; | |
535 } | |
536 | |
537 /** Find child anchor elements that contain the text [prop]. */ | |
538 AnchorElement findAnchorElement(Element root, String prop) { | |
539 for (AnchorElement a in root.queryAll("a")) { | |
540 if (a.text.contains(prop)) { | |
541 return a; | |
542 } | |
543 } | |
544 return null; | |
545 } | |
546 | |
547 // First surrounding element with an ID is safe enough. | |
548 Element findTighterRoot(Element elem, Element root) { | |
549 Element candidate = elem; | |
550 while (root != candidate) { | |
551 candidate = candidate.parent; | |
552 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { | |
553 break; | |
554 } | |
555 } | |
556 return candidate; | |
557 } | |
558 | |
559 // TODO(jacobr): this is very slow and ugly.. consider rewriting or at least | |
560 // commenting carefully. | |
561 SectionParseResult filteredHtml(Element elem, Element root, String prop, | |
562 Function fragmentGeneratedCallback) { | |
563 // Using a tighter root avoids false positives at the risk of trimming | |
564 // text we shouldn't. | |
565 root = findTighterRoot(elem, root); | |
566 final range = newRange(); | |
567 range.setStartBefore(elem); | |
568 | |
569 Element current = elem; | |
570 while (current != null) { | |
571 range.setEndBefore(current); | |
572 if (current.classes.contains(DART_REMOVED) && | |
573 range.toString().trim().length > 0) { | |
574 break; | |
575 } | |
576 if (current.firstElementChild != null) { | |
577 current = current.firstElementChild; | |
578 } else { | |
579 while (current != null) { | |
580 range.setEndAfter(current); | |
581 if (current == root) { | |
582 current = null; | |
583 break; | |
584 } | |
585 if (current.nextElementSibling != null) { | |
586 current = current.nextElementSibling; | |
587 break; | |
588 } | |
589 current = current.parent; | |
590 } | |
591 } | |
592 } | |
593 String url = null; | |
594 if (prop != null) { | |
595 Range firstLine = findFirstLine(range, prop); | |
596 if (firstLine != null) { | |
597 range.setStart(firstLine.endContainer, firstLine.endOffset); | |
598 DocumentFragment firstLineClone = firstLine.cloneContents(); | |
599 AnchorElement anchor = findAnchorElement(firstLineClone, prop); | |
600 if (anchor != null) { | |
601 url = getAbsoluteUrl(anchor); | |
602 } | |
603 } | |
604 } | |
605 final fragment = range.cloneContents(); | |
606 if (fragmentGeneratedCallback != null) { | |
607 fragmentGeneratedCallback(fragment); | |
608 } | |
609 // Strip tags we don't want | |
610 for (Element e in fragment.queryAll("script, object, style")) { | |
611 e.remove(); | |
612 } | |
613 | |
614 // Extract idl | |
615 final idl = new StringBuffer(); | |
616 if (prop != null && prop.length > 0) { | |
617 // Only expect properties to have HTML. | |
618 for(Element e in fragment.queryAll(IDL_SELECTOR)) { | |
619 idl.write(e.outerHTML); | |
620 e.remove(); | |
621 } | |
622 // TODO(jacobr) this is a very basic regex to see if text looks like IDL | |
623 RegExp likelyIdl = new RegExp(" $prop\\w*\\("); | |
624 | |
625 for (Element e in fragment.queryAll("pre")) { | |
626 // Check if it looks like idl... | |
627 String txt = e.text.trim(); | |
628 if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) { | |
629 idl.write(e.outerHTML); | |
630 e.remove(); | |
631 } | |
632 } | |
633 } | |
634 return new SectionParseResult(genCleanHtml(fragment), url, idl.toString()); | |
635 } | |
636 | |
637 /** | |
638 * Find the best child element of [root] that appears to be an API definition | |
639 * for [prop]. [allText] is a list of all text nodes under root computed by | |
640 * the caller to improve performance. | |
641 */ | |
642 Element findBest(Element root, List<Text> allText, String prop, | |
643 String propType) { | |
644 // Best bet: find a child of root where the id matches the property name. | |
645 Element cand = root.query("#$prop"); | |
646 | |
647 if (cand == null && propType == "methods") { | |
648 cand = root.query("[id=$prop\\(\\)]"); | |
649 } | |
650 while (cand != null && cand.text.trim().length == 0) { | |
651 // We found the bookmark for the element but sadly it is just an empty | |
652 // placeholder. Find the first real element. | |
653 cand = cand.nextElementSibling; | |
654 } | |
655 if (cand != null) { | |
656 return cand; | |
657 } | |
658 | |
659 // If we are at least 70 pixels from the left, something is definitely | |
660 // fishy and we shouldn't even consider this candidate as nobody visually | |
661 // formats API docs like that. | |
662 num candLeft = 70; | |
663 | |
664 for (Text text in allText) { | |
665 Element proposed = null; | |
666 | |
667 // TODO(jacobr): does it hurt precision to use the full cleanup? | |
668 String t = fullNameCleanup(text.text); | |
669 if (t == prop) { | |
670 proposed = text.parent; | |
671 ClientRect candRect = getClientRect(proposed); | |
672 | |
673 // TODO(jacobr): this is a good heuristic | |
674 // if (selObj.selector.indexOf(" > DD ") == -1 | |
675 if (candRect.left < candLeft) { | |
676 cand = proposed; | |
677 candLeft = candRect.left; | |
678 } | |
679 } | |
680 } | |
681 return cand; | |
682 } | |
683 | |
684 /** | |
685 * Checks whether [e] is tagged as obsolete or deprecated using heuristics | |
686 * for what these tags look like in the MDN docs. | |
687 */ | |
688 bool isObsolete(Element e) { | |
689 RegExp obsoleteRegExp = new RegExp(r"(^|\s)obsolete(?=\s|$)"); | |
690 RegExp deprecatedRegExp = new RegExp(r"(^|\s)deprecated(?=\s|$)"); | |
691 for (Element child in e.queryAll("span")) { | |
692 String t = child.text.toLowerCase(); | |
693 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true; | |
694 } | |
695 | |
696 String text = e.text.toLowerCase(); | |
697 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text); | |
698 } | |
699 | |
700 bool isFirstCharLowerCase(String str) { | |
701 return new RegExp("^[a-z]").hasMatch(str); | |
702 } | |
703 | |
704 /** | |
705 * Extracts information from a fragment of HTML only searching under the [root] | |
706 * html node. [secitonSelector] specifies the query to use to find candidate | |
707 * sections of the document to consider (there may be more than one). | |
708 * [currentType] specifies the name of the current class. [members] specifies | |
709 * the known class members for this class that we are attempting to find | |
710 * documentation for. [propType] indicates whether we are searching for | |
711 * methods, properties, constants, or constructors. | |
712 */ | |
713 void scrapeSection(Element root, String sectionSelector, String currentType, | |
714 List members, String propType) { | |
715 Map expectedProps = dartIdl[propType]; | |
716 | |
717 Set<String> alreadyMatchedProperties = new Set<String>(); | |
718 bool onlyConsiderTables = false; | |
719 ElementList allMatches = root.queryAll(sectionSelector); | |
720 if (allMatches.length == 0) { | |
721 // If we can't find any matches to the sectionSelector, we fall back to | |
722 // considering all tables in the document. This is dangerous so we only | |
723 // allow the safer table matching extraction rules for this case. | |
724 allMatches = root.queryAll(".fullwidth-table"); | |
725 onlyConsiderTables = true; | |
726 } | |
727 for (Element matchElement in allMatches) { | |
728 final match = matchElement.parent; | |
729 if (!match.id.startsWith("section") && match.id != "pageText") { | |
730 throw "Unexpected element $match"; | |
731 } | |
732 // We don't want to later display this text a second time while for example | |
733 // displaying class level summary information as then we would display | |
734 // the same documentation twice. | |
735 match.classes.add(DART_REMOVED); | |
736 | |
737 bool foundProps = false; | |
738 | |
739 // TODO(jacobr): we should really look for the table tag instead | |
740 // add an assert if we are missing something that is a table... | |
741 // TODO(jacobr) ignore tables in tables. | |
742 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) { | |
743 int helpIndex = -1; | |
744 num i = 0; | |
745 for (Element r in t.queryAll("th, td.header")) { | |
746 final txt = r.text.trim().split(" ")[0].toLowerCase(); | |
747 if (txt == "description") { | |
748 helpIndex = i; | |
749 break; | |
750 } | |
751 i++; | |
752 } | |
753 | |
754 // Figure out which column in the table contains member names by | |
755 // tracking how many member names each column contains. | |
756 final numMatches = new List<int>(i); | |
757 for (int j = 0; j < i; j++) { | |
758 numMatches[j] = 0; | |
759 } | |
760 | |
761 // Find the column that seems to have the most names that look like | |
762 // expected properties. | |
763 for (Element r in t.queryAll("tbody tr")) { | |
764 ElementList row = r.elements; | |
765 if (row.length == 0 || row.first.classes.contains(".header")) { | |
766 continue; | |
767 } | |
768 | |
769 for (int k = 0; k < numMatches.length && k < row.length; k++) { | |
770 if (expectedProps.containsKey(fullNameCleanup(row[k].text))) { | |
771 numMatches[k]++; | |
772 break; | |
773 } | |
774 } | |
775 } | |
776 | |
777 int propNameIndex = 0; | |
778 { | |
779 int bestCount = numMatches[0]; | |
780 for (int k = 1; k < numMatches.length; k++) { | |
781 if (numMatches[k] > bestCount) { | |
782 bestCount = numMatches[k]; | |
783 propNameIndex = k; | |
784 } | |
785 } | |
786 } | |
787 | |
788 for (Element r in t.queryAll("tbody tr")) { | |
789 final row = r.elements; | |
790 if (row.length > propNameIndex && row.length > helpIndex) { | |
791 if (row.first.classes.contains(".header")) { | |
792 continue; | |
793 } | |
794 // TODO(jacobr): this code for determining the namestr is needlessly | |
795 // messy. | |
796 final nameRow = row[propNameIndex]; | |
797 AnchorElement a = nameRow.query("a"); | |
798 String goodName = ''; | |
799 if (a != null) { | |
800 goodName = a.text.trim(); | |
801 } | |
802 String nameStr = nameRow.text; | |
803 | |
804 Map entry = new Map<String, String>(); | |
805 | |
806 entry["name"] = fullNameCleanup(nameStr.length > 0 ? | |
807 nameStr : goodName); | |
808 | |
809 final parse = filteredHtml(nameRow, nameRow, entry["name"], null); | |
810 String altHelp = parse.html; | |
811 | |
812 entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ? | |
813 altHelp : genPrettyHtmlFromElement(row[helpIndex]); | |
814 if (parse.url != null) { | |
815 entry["url"] = parse.url; | |
816 } | |
817 | |
818 if (parse.idl.length > 0) { | |
819 entry["idl"] = parse.idl; | |
820 } | |
821 | |
822 entry["obsolete"] = isObsolete(r); | |
823 | |
824 if (entry["name"].length > 0) { | |
825 cleanupEntry(members, entry); | |
826 alreadyMatchedProperties.add(entry['name']); | |
827 foundProps = true; | |
828 } | |
829 } | |
830 } | |
831 } | |
832 | |
833 if (onlyConsiderTables) { | |
834 continue; | |
835 } | |
836 | |
837 // After this point we have higher risk tests that attempt to perform | |
838 // rudimentary page segmentation. This approach is much more error-prone | |
839 // than using tables because the HTML is far less clearly structured. | |
840 | |
841 final allText = getAllTextNodes(match); | |
842 | |
843 final pmap = new Map<String, Element>(); | |
844 for (final prop in expectedProps.keys) { | |
845 if (alreadyMatchedProperties.contains(prop)) { | |
846 continue; | |
847 } | |
848 final e = findBest(match, allText, prop, propType); | |
849 if (e != null && !inTable(e)) { | |
850 pmap[prop] = e; | |
851 } | |
852 } | |
853 | |
854 for (final prop in pmap.keys) { | |
855 pmap[prop].classes.add(DART_REMOVED); | |
856 } | |
857 | |
858 // The problem is the MDN docs do place documentation for each method in a | |
859 // nice self contained subtree. Instead you will see something like: | |
860 | |
861 // <h3>drawImage</h3> | |
862 // <p>Draw image is an awesome method</p> | |
863 // some more info on drawImage here | |
864 // <h3>mozDrawWindow</h3> | |
865 // <p>This API cannot currently be used by Web content. | |
866 // It is chrome only.</p> | |
867 // <h3>drawRect</h3> | |
868 // <p>Always call drawRect instead of drawImage</p> | |
869 // some more info on drawRect here... | |
870 | |
871 // The trouble is we will easily detect that the drawImage and drawRect | |
872 // entries are method definitions because we know to search for these | |
873 // method names but we will not detect that mozDrawWindow is a method | |
874 // definition as that method doesn't exist in our IDL. Thus if we are not | |
875 // careful the definition for the drawImage method will contain the | |
876 // definition for the mozDrawWindow method as well which would result in | |
877 // broken docs. We solve this problem by finding all content with similar | |
878 // visual structure to the already found method definitions. It turns out | |
879 // that using the visual position of each element on the page is much | |
880 // more reliable than using the DOM structure | |
881 // (e.g. section_root > div > h3) for the MDN docs because MDN authors | |
882 // carefully check that the documentation for each method comment is | |
883 // visually consistent but take less care to check that each | |
884 // method comment has identical markup structure. | |
885 for (String prop in pmap.keys) { | |
886 Element e = pmap[prop]; | |
887 ClientRect r = getClientRect(e); | |
888 // TODO(jacobr): a lot of these queries are identical and this code | |
889 // could easily be optimized. | |
890 for (final cand in match.queryAll(e.tagName)) { | |
891 // TODO(jacobr): use a negative selector instead. | |
892 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) { | |
893 final candRect = getClientRect(cand); | |
894 // Only consider matches that have similar heights and identical left | |
895 // coordinates. | |
896 if (candRect.left == r.left && | |
897 (candRect.height - r.height).abs() < 5) { | |
898 String propName = fullNameCleanup(cand.text); | |
899 if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName) | |
900 && !alreadyMatchedProperties.contains(propName)) { | |
901 pmap[propName] = cand; | |
902 } | |
903 } | |
904 } | |
905 } | |
906 } | |
907 | |
908 // We mark these elements in batch to reduce the number of layouts | |
909 // triggered. TODO(jacobr): use new batch based async measurement to make | |
910 // this code flow simpler. | |
911 for (String prop in pmap.keys) { | |
912 Element e = pmap[prop]; | |
913 e.classes.add(DART_REMOVED); | |
914 } | |
915 | |
916 // Find likely "subsections" of the main section and mark them with | |
917 // DART_REMOVED so we don't include them in member descriptions... which | |
918 // would suck. | |
919 for (Element e in match.queryAll("[id]")) { | |
920 if (e.id.contains(matchElement.id)) { | |
921 e.classes.add(DART_REMOVED); | |
922 } | |
923 } | |
924 | |
925 for (String prop in pmap.keys) { | |
926 Element elem = pmap[prop]; | |
927 bool obsolete = false; | |
928 final parse = filteredHtml( | |
929 elem, match, prop, | |
930 (Element e) { | |
931 obsolete = isObsolete(e); | |
932 }); | |
933 Map entry = { | |
934 "url" : parse.url, | |
935 "name" : prop, | |
936 "help" : parse.html, | |
937 "obsolete" : obsolete | |
938 }; | |
939 if (parse.idl.length > 0) { | |
940 entry["idl"] = parse.idl; | |
941 } | |
942 cleanupEntry(members, entry); | |
943 } | |
944 } | |
945 } | |
946 | |
947 String trimHtml(String html) { | |
948 // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc. | |
949 return html; | |
950 } | |
951 | |
952 bool maybeName(String name) { | |
953 return new RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) || | |
954 new RegExp("^[A-Z][A-Z_]*\$").hasMatch(name); | |
955 } | |
956 | |
957 // TODO(jacobr): this element is ugly at the moment but will become easier to | |
958 // read once ElementList supports most of the Element functionality. | |
959 void markRemoved(var e) { | |
960 if (e != null) { | |
961 if (e is Element) { | |
962 e.classes.add(DART_REMOVED); | |
963 } else { | |
964 for (Element el in e) { | |
965 el.classes.add(DART_REMOVED); | |
966 } | |
967 } | |
968 } | |
969 } | |
970 | |
971 // TODO(jacobr): remove this when the dartium JSON parse handles \n correctly. | |
972 String JSONFIXUPHACK(String value) { | |
973 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX"); | |
974 } | |
975 | |
976 String mozToWebkit(String name) { | |
977 return name.replaceFirst(new RegExp("^moz"), "webkit"); | |
978 } | |
979 | |
980 String stripWebkit(String name) { | |
981 return trimPrefix(name, "webkit"); | |
982 } | |
983 | |
984 // TODO(jacobr): be more principled about this. | |
985 String fullNameCleanup(String name) { | |
986 int parenIndex = name.indexOf('('); | |
987 if (parenIndex != -1) { | |
988 name = name.substring(0, parenIndex); | |
989 } | |
990 name = name.split(" ")[0]; | |
991 name = name.split("\n")[0]; | |
992 name = name.split("\t")[0]; | |
993 name = name.split("*")[0]; | |
994 name = name.trim(); | |
995 name = safeNameCleanup(name); | |
996 return name; | |
997 } | |
998 | |
999 // Less agressive than the full name cleanup to avoid overeager matching. | |
1000 // TODO(jacobr): be more principled about this. | |
1001 String safeNameCleanup(String name) { | |
1002 int parenIndex = name.indexOf('('); | |
1003 if (parenIndex != -1 && name.indexOf(")") != -1) { | |
1004 // TODO(jacobr): workaround bug in: | |
1005 // name = name.split("(")[0]; | |
1006 name = name.substring(0, parenIndex); | |
1007 } | |
1008 name = name.trim(); | |
1009 name = trimPrefix(name, currentType + "."); | |
1010 name = trimPrefix(name, currentType.toLowerCase() + "."); | |
1011 name = trimPrefix(name, currentTypeShort + "."); | |
1012 name = trimPrefix(name, currentTypeShort.toLowerCase() + "."); | |
1013 name = trimPrefix(name, currentTypeTiny + "."); | |
1014 name = trimPrefix(name, currentTypeTiny.toLowerCase() + "."); | |
1015 name = name.trim(); | |
1016 name = mozToWebkit(name); | |
1017 return name; | |
1018 } | |
1019 | |
1020 /** | |
1021 * Remove h1, h2, and h3 headers. | |
1022 */ | |
1023 void removeHeaders(DocumentFragment fragment) { | |
1024 for (Element e in fragment.queryAll("h1, h2, h3")) { | |
1025 e.remove(); | |
1026 } | |
1027 } | |
1028 | |
1029 /** | |
1030 * Given an [entry] representing a single method or property cleanup the | |
1031 * values performing some simple normalization and only adding the entry to | |
1032 * [members] if it has a valid name. | |
1033 */ | |
1034 void cleanupEntry(List members, Map entry) { | |
1035 if (entry.containsKey('help')) { | |
1036 entry['help'] = trimHtml(entry['help']); | |
1037 } | |
1038 String name = fullNameCleanup(entry['name']); | |
1039 entry['name'] = name; | |
1040 if (maybeName(name)) { | |
1041 for (String key in entry.keys) { | |
1042 var value = entry[key]; | |
1043 if (value == null) { | |
1044 entry.remove(key); | |
1045 continue; | |
1046 } | |
1047 if (value is String) { | |
1048 entry[key] = JSONFIXUPHACK(value); | |
1049 } | |
1050 } | |
1051 members.add(entry); | |
1052 } | |
1053 } | |
1054 | |
1055 // TODO(jacobr) dup with trim start.... | |
1056 String trimPrefix(String str, String prefix) { | |
1057 if (str.indexOf(prefix) == 0) { | |
1058 return str.substring(prefix.length); | |
1059 } else { | |
1060 return str; | |
1061 } | |
1062 } | |
1063 | |
1064 String trimStart(String str, String start) { | |
1065 if (str.startsWith(start) && str.length > start.length) { | |
1066 return str.substring(start.length); | |
1067 } | |
1068 return str; | |
1069 } | |
1070 | |
1071 String trimEnd(String str, String end) { | |
1072 if (str.endsWith(end) && str.length > end.length) { | |
1073 return str.substring(0, str.length - end.length); | |
1074 } | |
1075 return str; | |
1076 } | |
1077 | |
1078 /** | |
1079 * Extract a section with name [key] using [selector] to find start points for | |
1080 * the section in the document. | |
1081 */ | |
1082 void extractSection(String selector, String key) { | |
1083 for (Element e in document.queryAll(selector)) { | |
1084 e = e.parent; | |
1085 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) { | |
1086 skip.remove(); | |
1087 } | |
1088 String html = filteredHtml(e, e, null, removeHeaders).html; | |
1089 if (html.length > 0) { | |
1090 if (dbEntry.containsKey(key)) { | |
1091 dbEntry[key] += html; | |
1092 } else { | |
1093 dbEntry[key] = html; | |
1094 } | |
1095 } | |
1096 e.classes.add(DART_REMOVED); | |
1097 } | |
1098 } | |
1099 | |
1100 void run() { | |
1101 // Inject CSS to ensure lines don't wrap unless they were intended to. | |
1102 // This is needed to make the logic to determine what is a single line | |
1103 // behave consistently even for very long method names. | |
1104 document.head.nodes.add(new Element.html(""" | |
1105 <style type="text/css"> | |
1106 body { | |
1107 width: 10000px; | |
1108 } | |
1109 </style>""")); | |
1110 | |
1111 String title = trimEnd(window.document.title.trim(), " - MDN"); | |
1112 dbEntry['title'] = title; | |
1113 | |
1114 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best | |
1115 // place to do this... | |
1116 // TODO(jacobr): move this to right before we extract HTML. | |
1117 | |
1118 // Remove the "Introduced in HTML <version>" boxes. | |
1119 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) { | |
1120 e.remove(); | |
1121 } | |
1122 | |
1123 // Flatten the list of known DOM types into a faster and case-insensitive | |
1124 // map. | |
1125 domTypes = {}; | |
1126 for (final domType in domTypesRaw) { | |
1127 domTypes[domType.toLowerCase()] = domType; | |
1128 } | |
1129 | |
1130 // Fix up links. | |
1131 final SHORT_LINK = new RegExp(r'^[\w/]+$'); | |
1132 final INNER_LINK = new RegExp(r'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$'); | |
1133 final MEMBER_LINK = new RegExp(r'(\w+)[.#](\w+)'); | |
1134 final RELATIVE_LINK = new RegExp(r'^(?:../)*/?[Ee][Nn]/(.+)'); | |
1135 | |
1136 // - Make relative links absolute. | |
1137 // - If we can, take links that point to other MDN pages and retarget them | |
1138 // to appropriate pages in our docs. | |
1139 // TODO(rnystrom): Add rel external to links we didn't fix. | |
1140 for (AnchorElement a in document.queryAll('a')) { | |
1141 // Get the raw attribute because we *don't* want the browser to fully- | |
1142 // qualify the name for us since it has the wrong base address for the | |
1143 // page. | |
1144 var href = a.attributes['href']; | |
1145 | |
1146 // Ignore busted links. | |
1147 if (href == null) continue; | |
1148 | |
1149 // If we can recognize what it's pointing to, point it to our page instead. | |
1150 tryToLinkToRealType(maybeType) { | |
1151 // See if we know a type with that name. | |
1152 final realType = domTypes[maybeType.toLowerCase()]; | |
1153 if (realType != null) { | |
1154 href = '../html/$realType.html'; | |
1155 } | |
1156 } | |
1157 | |
1158 // If it's a relative link (that we know how to root), make it absolute. | |
1159 var match = RELATIVE_LINK.firstMatch(href); | |
1160 if (match != null) { | |
1161 href = 'https://developer.mozilla.org/en/${match[1]}'; | |
1162 } | |
1163 | |
1164 // If it's a word link like "foo" find a type or make it absolute. | |
1165 match = SHORT_LINK.firstMatch(href); | |
1166 if (match != null) { | |
1167 href = 'https://developer.mozilla.org/en/DOM/${match[0]}'; | |
1168 } | |
1169 | |
1170 // TODO(rnystrom): This is a terrible way to do this. Should use the real | |
1171 // mapping from DOM names to html class names that we use elsewhere in the | |
1172 // DOM scripts. | |
1173 match = INNER_LINK.firstMatch(href); | |
1174 if (match != null) { | |
1175 // See if we're linking to a member ("type.name" or "type#name") or just | |
1176 // a type ("type"). | |
1177 final member = MEMBER_LINK.firstMatch(match[1]); | |
1178 if (member != null) { | |
1179 tryToLinkToRealType(member[1]); | |
1180 } else { | |
1181 tryToLinkToRealType(match[1]); | |
1182 } | |
1183 } | |
1184 | |
1185 // Put it back into the element. | |
1186 a.attributes['href'] = href; | |
1187 } | |
1188 | |
1189 if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) { | |
1190 bool foundMatch = false; | |
1191 // Test out if the title is really an HTML tag that matches the | |
1192 // current class name. | |
1193 for (String tag in [title.split(" ")[0], title.split(".").last]) { | |
1194 try { | |
1195 Element element = new Element.tag(tag); | |
1196 // TODO(jacobr): this is a really ugly way of doing this that will | |
1197 // stop working at some point soon. | |
1198 if (element.typeName == currentType) { | |
1199 foundMatch = true; | |
1200 break; | |
1201 } | |
1202 } catch (e) {} | |
1203 } | |
1204 if (!foundMatch) { | |
1205 dbEntry['skipped'] = true; | |
1206 dbEntry['cause'] = "Suspect title"; | |
1207 onEnd(); | |
1208 return; | |
1209 } | |
1210 } | |
1211 | |
1212 Element root = document.query(".pageText"); | |
1213 if (root == null) { | |
1214 dbEntry['cause'] = '.pageText not found'; | |
1215 onEnd(); | |
1216 return; | |
1217 } | |
1218 | |
1219 markRemoved(root.query("#Notes")); | |
1220 List members = dbEntry['members']; | |
1221 | |
1222 // This is a laundry list of CSS selectors for boilerplate content on the | |
1223 // MDN pages that we should ignore for the purposes of extracting | |
1224 // documentation. | |
1225 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar")); | |
1226 markRemoved(document.queryAll("#article-nav")); | |
1227 markRemoved(document.queryAll(".hideforedit")); | |
1228 markRemoved(document.queryAll(".navbox")); | |
1229 markRemoved(document.query("#Method_overview")); | |
1230 markRemoved(document.queryAll("h1, h2")); | |
1231 | |
1232 scrapeSection(root, "#Methods", currentType, members, 'methods'); | |
1233 scrapeSection(root, "#Constants, #Error_codes, #State_constants", | |
1234 currentType, members, 'constants'); | |
1235 // TODO(jacobr): infer tables based on multiple matches rather than | |
1236 // using a hard coded list of section ids. | |
1237 scrapeSection(root, | |
1238 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " + | |
1239 "#DOM_properties, #Event_handlers, #Event_Handlers", | |
1240 currentType, members, 'properties'); | |
1241 | |
1242 // Avoid doing this till now to avoid messing up the section scrape. | |
1243 markRemoved(document.queryAll("h3")); | |
1244 | |
1245 ElementList examples = root.queryAll("span[id^=example], span[id^=Example]"); | |
1246 | |
1247 extractSection("#See_also", 'seeAlso'); | |
1248 extractSection("#Specification, #Specifications", "specification"); | |
1249 | |
1250 // TODO(jacobr): actually extract the constructor(s) | |
1251 extractSection("#Constructor, #Constructors", 'constructor'); | |
1252 extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); | |
1253 | |
1254 // Extract examples. | |
1255 List<String> exampleHtml = []; | |
1256 for (Element e in examples) { | |
1257 e.classes.add(DART_REMOVED); | |
1258 } | |
1259 for (Element e in examples) { | |
1260 String html = filteredHtml(e, root, null, | |
1261 (DocumentFragment fragment) { | |
1262 removeHeaders(fragment); | |
1263 if (fragment.text.trim().toLowerCase() == "example") { | |
1264 // Degenerate example. | |
1265 fragment.nodes.clear(); | |
1266 } | |
1267 }).html; | |
1268 if (html.length > 0) { | |
1269 exampleHtml.add(html); | |
1270 } | |
1271 } | |
1272 if (exampleHtml.length > 0) { | |
1273 dbEntry['examples'] = exampleHtml; | |
1274 } | |
1275 | |
1276 // Extract the class summary. | |
1277 // Basically everything left over after the #Summary or #Description tag is | |
1278 // safe to include in the summary. | |
1279 StringBuffer summary = new StringBuffer(); | |
1280 for (Element e in root.queryAll("#Summary, #Description")) { | |
1281 summary.write(filteredHtml(root, e, null, removeHeaders).html); | |
1282 } | |
1283 | |
1284 if (summary.length == 0) { | |
1285 // Remove the "Gecko DOM Reference text" | |
1286 Element ref = root.query(".lang.lang-en"); | |
1287 if (ref != null) { | |
1288 ref = ref.parent; | |
1289 String refText = ref.text.trim(); | |
1290 if (refText == "Gecko DOM Reference" || | |
1291 refText == "« Gecko DOM Reference") { | |
1292 ref.remove(); | |
1293 } | |
1294 } | |
1295 // Risky... this might add stuff we shouldn't. | |
1296 summary.write(filteredHtml(root, root, null, removeHeaders).html); | |
1297 } | |
1298 | |
1299 if (summary.length > 0) { | |
1300 dbEntry['summary'] = summary.toString(); | |
1301 } | |
1302 | |
1303 // Inject CSS to aid debugging in the browser. | |
1304 // We could avoid doing this if we know we are not running in a browser.. | |
1305 document.head.nodes.add(new Element.html(DEBUG_CSS)); | |
1306 | |
1307 onEnd(); | |
1308 } | |
1309 | |
1310 void main() { | |
1311 window.on.load.add(documentLoaded); | |
1312 } | |
1313 | |
1314 void documentLoaded(event) { | |
1315 // Load the database of expected methods and properties with an HttpRequest. | |
1316 new HttpRequest.get('${window.location}.json', (req) { | |
1317 data = JSON.decode(req.responseText); | |
1318 dbEntry = {'members': [], 'srcUrl': pageUrl}; | |
1319 run(); | |
1320 }); | |
1321 } | |
OLD | NEW |