| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 import "dart:collection"; | |
| 6 import 'dart:convert'; | |
| 7 import 'dart:html'; | |
| 8 | |
| 9 // Workaround for HTML lib missing feature. | |
| 10 Range newRange() { | |
| 11 return document.createRange(); | |
| 12 } | |
| 13 | |
| 14 // Temporary range object to optimize performance computing client rects | |
| 15 // from text nodes. | |
| 16 Range _tempRange; | |
| 17 // Hacks because ASYNC measurement is annoying when just writing a script. | |
| 18 ClientRect getClientRect(Node n) { | |
| 19 if (n is Element) { | |
| 20 return n.$dom_getBoundingClientRect(); | |
| 21 } else { | |
| 22 // Crazy hacks that works for nodes.... create a range and measure it. | |
| 23 if (_tempRange == null) { | |
| 24 _tempRange = newRange(); | |
| 25 } | |
| 26 _tempRange.setStartBefore(n); | |
| 27 _tempRange.setEndAfter(n); | |
| 28 return _tempRange.getBoundingClientRect(); | |
| 29 } | |
| 30 } | |
| 31 | |
| 32 /** | |
| 33 * CSS class that is added to elements in the DOM to indicate that they should | |
| 34 * be removed when extracting blocks of documentation. This is helpful when | |
| 35 * running this script in a web browser as it is easy to visually see what | |
| 36 * blocks of information were extracted when using CSS such as DEBUG_CSS | |
| 37 * which highlights elements that should be removed. | |
| 38 */ | |
| 39 const DART_REMOVED = "dart-removed"; | |
| 40 | |
| 41 const DEBUG_CSS = """ | |
| 42 <style type="text/css"> | |
| 43 .dart-removed { | |
| 44 background-color: rgba(255, 0, 0, 0.5); | |
| 45 } | |
| 46 </style>"""; | |
| 47 | |
| 48 const MIN_PIXELS_DIFFERENT_LINES = 10; | |
| 49 | |
| 50 const IDL_SELECTOR = "pre.eval, pre.idl"; | |
| 51 | |
| 52 Map data; | |
| 53 | |
| 54 // TODO(rnystrom): Hack! Copied from domTypes.json. Instead of hard-coding | |
| 55 // these, should use the same mapping that the DOM/HTML code generators use. | |
| 56 var domTypes; | |
| 57 const domTypesRaw = const [ | |
| 58 "AbstractWorker", "ArrayBuffer", "ArrayBufferView", "Attr", | |
| 59 "AudioBuffer", "AudioBufferSourceNode", "AudioChannelMerger", | |
| 60 "AudioChannelSplitter", "AudioContext", "AudioDestinationNode", | |
| 61 "AudioGain", "AudioGainNode", "AudioListener", "AudioNode", | |
| 62 "AudioPannerNode", "AudioParam", "AudioProcessingEvent", | |
| 63 "AudioSourceNode", "BarInfo", "BeforeLoadEvent", "BiquadFilterNode", | |
| 64 "Blob", "CDATASection", "CSSCharsetRule", "CSSFontFaceRule", | |
| 65 "CSSImportRule", "CSSMediaRule", "CSSPageRule", "CSSPrimitiveValue", | |
| 66 "CSSRule", "CSSRuleList", "CSSStyleDeclaration", "CSSStyleRule", | |
| 67 "CSSStyleSheet", "CSSUnknownRule", "CSSValue", "CSSValueList", | |
| 68 "CanvasGradient", "CanvasPattern", "CanvasPixelArray", | |
| 69 "CanvasRenderingContext", "CanvasRenderingContext2D", | |
| 70 "CharacterData", "ClientRect", "ClientRectList", "Clipboard", | |
| 71 "CloseEvent", "Comment", "CompositionEvent", "Console", | |
| 72 "ConvolverNode", "Coordinates", "Counter", "Crypto", "CustomEvent", | |
| 73 "DOMApplicationCache", "DOMException", "DOMFileSystem", | |
| 74 "DOMFileSystemSync", "DOMFormData", "DOMImplementation", | |
| 75 "DOMMimeType", "DOMMimeTypeArray", "DOMParser", "DOMPlugin", | |
| 76 "DOMPluginArray", "DOMSelection", "DOMSettableTokenList", | |
| 77 "DOMTokenList", "DOMURL", "DOMWindow", "DataTransferItem", | |
| 78 "DataTransferItemList", "DataView", "Database", "DatabaseSync", | |
| 79 "DedicatedWorkerContext", "DelayNode", "DeviceMotionEvent", | |
| 80 "DeviceOrientationEvent", "DirectoryEntry", "DirectoryEntrySync", | |
| 81 "DirectoryReader", "DirectoryReaderSync", "Document", | |
| 82 "DocumentFragment", "DocumentType", "DynamicsCompressorNode", | |
| 83 "Element", "ElementTimeControl", "ElementTraversal", "Entity", | |
| 84 "EntityReference", "Entry", "EntryArray", "EntryArraySync", | |
| 85 "EntrySync", "ErrorEvent", "Event", "EventException", "EventSource", | |
| 86 "EventTarget", "File", "FileEntry", "FileEntrySync", "FileError", | |
| 87 "FileException", "FileList", "FileReader", "FileReaderSync", | |
| 88 "FileWriter", "FileWriterSync", "Float32Array", "Float64Array", | |
| 89 "Geolocation", "Geoposition", "HTMLAllCollection", | |
| 90 "HTMLAnchorElement", "HTMLAppletElement", "HTMLAreaElement", | |
| 91 "HTMLAudioElement", "HTMLBRElement", "HTMLBaseElement", | |
| 92 "HTMLBaseFontElement", "HTMLBodyElement", "HTMLButtonElement", | |
| 93 "HTMLCanvasElement", "HTMLCollection", "HTMLDListElement", | |
| 94 "HTMLDataListElement", "HTMLDetailsElement", "HTMLDirectoryElement", | |
| 95 "HTMLDivElement", "HTMLDocument", "HTMLElement", "HTMLEmbedElement", | |
| 96 "HTMLFieldSetElement", "HTMLFontElement", "HTMLFormElement", | |
| 97 "HTMLFrameElement", "HTMLFrameSetElement", "HTMLHRElement", | |
| 98 "HTMLHeadElement", "HTMLHeadingElement", "HTMLHtmlElement", | |
| 99 "HTMLIFrameElement", "HTMLImageElement", "HTMLInputElement", | |
| 100 "HTMLIsIndexElement", "HTMLKeygenElement", "HTMLLIElement", | |
| 101 "HTMLLabelElement", "HTMLLegendElement", "HTMLLinkElement", | |
| 102 "HTMLMapElement", "HTMLMarqueeElement", "HTMLMediaElement", | |
| 103 "HTMLMenuElement", "HTMLMetaElement", "HTMLMeterElement", | |
| 104 "HTMLModElement", "HTMLOListElement", "HTMLObjectElement", | |
| 105 "HTMLOptGroupElement", "HTMLOptionElement", "HTMLOptionsCollection", | |
| 106 "HTMLOutputElement", "HTMLParagraphElement", "HTMLParamElement", | |
| 107 "HTMLPreElement", "HTMLProgressElement", "HTMLQuoteElement", | |
| 108 "HTMLScriptElement", "HTMLSelectElement", "HTMLSourceElement", | |
| 109 "HTMLSpanElement", "HTMLStyleElement", "HTMLTableCaptionElement", | |
| 110 "HTMLTableCellElement", "HTMLTableColElement", "HTMLTableElement", | |
| 111 "HTMLTableRowElement", "HTMLTableSectionElement", | |
| 112 "HTMLTextAreaElement", "HTMLTitleElement", "HTMLTrackElement", | |
| 113 "HTMLUListElement", "HTMLUnknownElement", "HTMLVideoElement", | |
| 114 "HashChangeEvent", "HighPass2FilterNode", "History", "IDBAny", | |
| 115 "IDBCursor", "IDBCursorWithValue", "IDBDatabase", | |
| 116 "IDBDatabaseError", "IDBDatabaseException", "IDBFactory", | |
| 117 "IDBIndex", "IDBKey", "IDBKeyRange", "IDBObjectStore", "IDBRequest", | |
| 118 "IDBTransaction", "IDBVersionChangeEvent", | |
| 119 "IDBVersionChangeRequest", "ImageData", "InjectedScriptHost", | |
| 120 "InspectorFrontendHost", "Int16Array", "Int32Array", "Int8Array", | |
| 121 "JavaScriptAudioNode", "JavaScriptCallFrame", "KeyboardEvent", | |
| 122 "Location", "LowPass2FilterNode", "MediaElementAudioSourceNode", | |
| 123 "MediaError", "MediaList", "MediaQueryList", | |
| 124 "MediaQueryListListener", "MemoryInfo", "MessageChannel", | |
| 125 "MessageEvent", "MessagePort", "Metadata", "MouseEvent", | |
| 126 "MutationCallback", "MutationEvent", "MutationRecord", | |
| 127 "NamedNodeMap", "Navigator", "NavigatorUserMediaError", | |
| 128 "NavigatorUserMediaSuccessCallback", "Node", "NodeFilter", | |
| 129 "NodeIterator", "NodeList", "NodeSelector", "Notation", | |
| 130 "Notification", "NotificationCenter", "OESStandardDerivatives", | |
| 131 "OESTextureFloat", "OESVertexArrayObject", | |
| 132 "OfflineAudioCompletionEvent", "OperationNotAllowedException", | |
| 133 "OverflowEvent", "PageTransitionEvent", "Performance", | |
| 134 "PerformanceNavigation", "PerformanceTiming", "PopStateEvent", | |
| 135 "PositionError", "ProcessingInstruction", "ProgressEvent", | |
| 136 "RGBColor", "Range", "RangeException", "RealtimeAnalyserNode", | |
| 137 "Rect", "SQLError", "SQLException", "SQLResultSet", | |
| 138 "SQLResultSetRowList", "SQLTransaction", "SQLTransactionSync", | |
| 139 "SVGAElement", "SVGAltGlyphDefElement", "SVGAltGlyphElement", | |
| 140 "SVGAltGlyphItemElement", "SVGAngle", "SVGAnimateColorElement", | |
| 141 "SVGAnimateElement", "SVGAnimateMotionElement", | |
| 142 "SVGAnimateTransformElement", "SVGAnimatedAngle", | |
| 143 "SVGAnimatedBoolean", "SVGAnimatedEnumeration", | |
| 144 "SVGAnimatedInteger", "SVGAnimatedLength", "SVGAnimatedLengthList", | |
| 145 "SVGAnimatedNumber", "SVGAnimatedNumberList", | |
| 146 "SVGAnimatedPreserveAspectRatio", "SVGAnimatedRect", | |
| 147 "SVGAnimatedString", "SVGAnimatedTransformList", | |
| 148 "SVGAnimationElement", "SVGCircleElement", "SVGClipPathElement", | |
| 149 "SVGColor", "SVGComponentTransferFunctionElement", | |
| 150 "SVGCursorElement", "SVGDefsElement", "SVGDescElement", | |
| 151 "SVGDocument", "SVGElement", "SVGElementInstance", | |
| 152 "SVGElementInstanceList", "SVGEllipseElement", "SVGException", | |
| 153 "SVGExternalResourcesRequired", "SVGFEBlendElement", | |
| 154 "SVGFEColorMatrixElement", "SVGFEComponentTransferElement", | |
| 155 "SVGFECompositeElement", "SVGFEConvolveMatrixElement", | |
| 156 "SVGFEDiffuseLightingElement", "SVGFEDisplacementMapElement", | |
| 157 "SVGFEDistantLightElement", "SVGFEDropShadowElement", | |
| 158 "SVGFEFloodElement", "SVGFEFuncAElement", "SVGFEFuncBElement", | |
| 159 "SVGFEFuncGElement", "SVGFEFuncRElement", | |
| 160 "SVGFEGaussianBlurElement", "SVGFEImageElement", | |
| 161 "SVGFEMergeElement", "SVGFEMergeNodeElement", | |
| 162 "SVGFEMorphologyElement", "SVGFEOffsetElement", | |
| 163 "SVGFEPointLightElement", "SVGFESpecularLightingElement", | |
| 164 "SVGFESpotLightElement", "SVGFETileElement", | |
| 165 "SVGFETurbulenceElement", "SVGFilterElement", | |
| 166 "SVGFilterPrimitiveStandardAttributes", "SVGFitToViewBox", | |
| 167 "SVGFontElement", "SVGFontFaceElement", "SVGFontFaceFormatElement", | |
| 168 "SVGFontFaceNameElement", "SVGFontFaceSrcElement", | |
| 169 "SVGFontFaceUriElement", "SVGForeignObjectElement", "SVGGElement", | |
| 170 "SVGGlyphElement", "SVGGlyphRefElement", "SVGGradientElement", | |
| 171 "SVGHKernElement", "SVGImageElement", "SVGLangSpace", "SVGLength", | |
| 172 "SVGLengthList", "SVGLineElement", "SVGLinearGradientElement", | |
| 173 "SVGLocatable", "SVGMPathElement", "SVGMarkerElement", | |
| 174 "SVGMaskElement", "SVGMatrix", "SVGMetadataElement", | |
| 175 "SVGMissingGlyphElement", "SVGNumber", "SVGNumberList", "SVGPaint", | |
| 176 "SVGPathElement", "SVGPathSeg", "SVGPathSegArcAbs", | |
| 177 "SVGPathSegArcRel", "SVGPathSegClosePath", | |
| 178 "SVGPathSegCurvetoCubicAbs", "SVGPathSegCurvetoCubicRel", | |
| 179 "SVGPathSegCurvetoCubicSmoothAbs", | |
| 180 "SVGPathSegCurvetoCubicSmoothRel", "SVGPathSegCurvetoQuadraticAbs", | |
| 181 "SVGPathSegCurvetoQuadraticRel", | |
| 182 "SVGPathSegCurvetoQuadraticSmoothAbs", | |
| 183 "SVGPathSegCurvetoQuadraticSmoothRel", "SVGPathSegLinetoAbs", | |
| 184 "SVGPathSegLinetoHorizontalAbs", "SVGPathSegLinetoHorizontalRel", | |
| 185 "SVGPathSegLinetoRel", "SVGPathSegLinetoVerticalAbs", | |
| 186 "SVGPathSegLinetoVerticalRel", "SVGPathSegList", | |
| 187 "SVGPathSegMovetoAbs", "SVGPathSegMovetoRel", "SVGPatternElement", | |
| 188 "SVGPoint", "SVGPointList", "SVGPolygonElement", | |
| 189 "SVGPolylineElement", "SVGPreserveAspectRatio", | |
| 190 "SVGRadialGradientElement", "SVGRect", "SVGRectElement", | |
| 191 "SVGRenderingIntent", "SVGSVGElement", "SVGScriptElement", | |
| 192 "SVGSetElement", "SVGStopElement", "SVGStringList", "SVGStylable", | |
| 193 "SVGStyleElement", "SVGSwitchElement", "SVGSymbolElement", | |
| 194 "SVGTRefElement", "SVGTSpanElement", "SVGTests", | |
| 195 "SVGTextContentElement", "SVGTextElement", "SVGTextPathElement", | |
| 196 "SVGTextPositioningElement", "SVGTitleElement", "SVGTransform", | |
| 197 "SVGTransformList", "SVGTransformable", "SVGURIReference", | |
| 198 "SVGUnitTypes", "SVGUseElement", "SVGVKernElement", | |
| 199 "SVGViewElement", "SVGViewSpec", "SVGZoomAndPan", "SVGZoomEvent", | |
| 200 "Screen", "ScriptProfile", "ScriptProfileNode", "SharedWorker", | |
| 201 "SharedWorkercontext", "SpeechInputEvent", "SpeechInputResult", | |
| 202 "SpeechInputResultList", "Storage", "StorageEvent", "StorageInfo", | |
| 203 "StyleMedia", "StyleSheet", "StyleSheetList", "Text", "TextEvent", | |
| 204 "TextMetrics", "TextTrack", "TextTrackCue", "TextTrackCueList", | |
| 205 "TimeRanges", "Touch", "TouchEvent", "TouchList", "TreeWalker", | |
| 206 "UIEvent", "Uint16Array", "Uint32Array", "Uint8Array", | |
| 207 "ValidityState", "VoidCallback", "WaveShaperNode", | |
| 208 "WebGLActiveInfo", "WebGLBuffer", "WebGLContextAttributes", | |
| 209 "WebGLContextEvent", "WebGLDebugRendererInfo", "WebGLDebugShaders", | |
| 210 "WebGLFramebuffer", "WebGLProgram", "WebGLRenderbuffer", | |
| 211 "WebGLRenderingContext", "WebGLShader", "WebGLTexture", | |
| 212 "WebGLUniformLocation", "WebGLVertexArrayObjectOES", | |
| 213 "WebKitAnimation", "WebKitAnimationEvent", "WebKitAnimationList", | |
| 214 "WebKitBlobBuilder", "WebKitCSSFilterValue", | |
| 215 "WebKitCSSKeyframeRule", "WebKitCSSKeyframesRule", | |
| 216 "WebKitCSSMatrix", "WebKitCSSTransformValue", "WebKitFlags", | |
| 217 "WebKitLoseContext", "WebKitMutationObserver", "WebKitPoint", | |
| 218 "WebKitTransitionEvent", "WebSocket", "WheelEvent", "Worker", | |
| 219 "WorkerContext", "WorkerLocation", "WorkerNavigator", | |
| 220 "XMLHttpRequest", "XMLHttpRequestException", | |
| 221 "XMLHttpRequestProgressEvent", "XMLHttpRequestUpload", | |
| 222 "XMLSerializer", "XPathEvaluator", "XPathException", | |
| 223 "XPathExpression", "XPathNSResolver", "XPathResult", | |
| 224 "XSLTProcessor", "AudioBufferCallback", "DatabaseCallback", | |
| 225 "EntriesCallback", "EntryCallback", "ErrorCallback", "FileCallback", | |
| 226 "FileSystemCallback", "FileWriterCallback", "MetadataCallback", | |
| 227 "NavigatorUserMediaErrorCallback", "PositionCallback", | |
| 228 "PositionErrorCallback", "SQLStatementCallback", | |
| 229 "SQLStatementErrorCallback", "SQLTransactionCallback", | |
| 230 "SQLTransactionErrorCallback", "SQLTransactionSyncCallback", | |
| 231 "StorageInfoErrorCallback", "StorageInfoQuotaCallback", | |
| 232 "StorageInfoUsageCallback", "StringCallback" | |
| 233 ]; | |
| 234 | |
| 235 Map dbEntry; | |
| 236 | |
| 237 Map get dartIdl => data['dartIdl']; | |
| 238 String get currentType => data['type']; | |
| 239 | |
| 240 String _currentTypeShort; | |
| 241 String get currentTypeShort { | |
| 242 if (_currentTypeShort == null) { | |
| 243 _currentTypeShort = currentType; | |
| 244 _currentTypeShort = trimPrefix(_currentTypeShort, "HTML"); | |
| 245 _currentTypeShort = trimPrefix(_currentTypeShort, "SVG"); | |
| 246 _currentTypeShort = trimPrefix(_currentTypeShort, "DOM"); | |
| 247 _currentTypeShort = trimPrefix(_currentTypeShort, "WebKit"); | |
| 248 _currentTypeShort = trimPrefix(_currentTypeShort, "Webkit"); | |
| 249 } | |
| 250 return _currentTypeShort; | |
| 251 } | |
| 252 | |
| 253 String _currentTypeTiny; | |
| 254 String get currentTypeTiny { | |
| 255 if (_currentTypeTiny == null) { | |
| 256 _currentTypeTiny = currentTypeShort; | |
| 257 _currentTypeTiny = trimEnd(_currentTypeTiny, "Element"); | |
| 258 } | |
| 259 return _currentTypeTiny; | |
| 260 } | |
| 261 | |
| 262 Map get searchResult => data['searchResult']; | |
| 263 String get pageUrl => searchResult['link']; | |
| 264 | |
| 265 String _pageDomain; | |
| 266 String get pageDomain { | |
| 267 if (_pageDomain == null) { | |
| 268 _pageDomain = pageUrl.substring(0, pageUrl.indexOf("/", "https://".length)); | |
| 269 } | |
| 270 return _pageDomain; | |
| 271 } | |
| 272 | |
| 273 String get pageDir { | |
| 274 return pageUrl.substring(0, pageUrl.lastIndexOf('/') + 1); | |
| 275 } | |
| 276 | |
| 277 String getAbsoluteUrl(AnchorElement anchor) { | |
| 278 if (anchor == null || anchor.href.length == 0) return ''; | |
| 279 String path = anchor.href; | |
| 280 RegExp fullUrlRegExp = new RegExp("^https?://"); | |
| 281 if (fullUrlRegExp.hasMatch(path)) return path; | |
| 282 if (path.startsWith('/')) { | |
| 283 return "$pageDomain$path"; | |
| 284 } else if (path.startsWith("#")) { | |
| 285 return "$pageUrl$path"; | |
| 286 } else { | |
| 287 return "$pageDir$path"; | |
| 288 } | |
| 289 } | |
| 290 | |
| 291 bool inTable(Node n) { | |
| 292 while (n != null) { | |
| 293 if (n is TableElement) return true; | |
| 294 n = n.parent; | |
| 295 } | |
| 296 return false; | |
| 297 } | |
| 298 | |
| 299 String escapeHTML(str) { | |
| 300 Element e = new Element.tag("div"); | |
| 301 e.text = str; | |
| 302 return e.innerHTML; | |
| 303 } | |
| 304 | |
| 305 List<Text> getAllTextNodes(Element elem) { | |
| 306 final nodes = <Text>[]; | |
| 307 helper(Node n) { | |
| 308 if (n is Text) { | |
| 309 nodes.add(n); | |
| 310 } else { | |
| 311 for (Node child in n.nodes) { | |
| 312 helper(child); | |
| 313 } | |
| 314 } | |
| 315 }; | |
| 316 | |
| 317 helper(elem); | |
| 318 return nodes; | |
| 319 } | |
| 320 | |
| 321 /** | |
| 322 * Whether a node and its children are all types that are safe to skip if the | |
| 323 * nodes have no text content. | |
| 324 */ | |
| 325 bool isSkippableType(Node n) { | |
| 326 // TODO(jacobr): are there any types we don't want to skip even if they | |
| 327 // have no text content? | |
| 328 if (n is ImageElement || n is CanvasElement || n is InputElement | |
| 329 || n is ObjectElement) { | |
| 330 return false; | |
| 331 } | |
| 332 if (n is Text) return true; | |
| 333 | |
| 334 for (final child in n.nodes) { | |
| 335 if (!isSkippableType(child)) { | |
| 336 return false; | |
| 337 } | |
| 338 } | |
| 339 return true; | |
| 340 } | |
| 341 | |
| 342 bool isSkippable(Node n) { | |
| 343 if (!isSkippableType(n)) return false; | |
| 344 return n.text.trim().length == 0; | |
| 345 } | |
| 346 | |
| 347 void onEnd() { | |
| 348 // Hideous hack to send JSON back to JS. | |
| 349 String dbJson = JSON.encode(dbEntry); | |
| 350 // workaround bug in JSON.decode. | |
| 351 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n"); | |
| 352 | |
| 353 // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple | |
| 354 // isolate based Dart-JS interop solution in the future. | |
| 355 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*"); | |
| 356 } | |
| 357 | |
| 358 class SectionParseResult { | |
| 359 final String html; | |
| 360 final String url; | |
| 361 final String idl; | |
| 362 SectionParseResult(this.html, this.url, this.idl); | |
| 363 } | |
| 364 | |
| 365 String genCleanHtml(Element root) { | |
| 366 for (final e in root.queryAll(".$DART_REMOVED")) { | |
| 367 e.classes.remove(DART_REMOVED); | |
| 368 } | |
| 369 | |
| 370 // Ditch inline styles. | |
| 371 for (final e in root.queryAll('[style]')) { | |
| 372 e.attributes.remove('style'); | |
| 373 } | |
| 374 | |
| 375 // These elements are just tags that we should suppress. | |
| 376 for (final e in root.queryAll(".lang.lang-en")) { | |
| 377 e.remove(); | |
| 378 } | |
| 379 | |
| 380 Element parametersHeader; | |
| 381 Element returnValueHeader; | |
| 382 for (final e in root.queryAll("h6")) { | |
| 383 if (e.text == 'Parameters') { | |
| 384 parametersHeader = e; | |
| 385 } else if (e.text == 'Return value') { | |
| 386 returnValueHeader = e; | |
| 387 } | |
| 388 } | |
| 389 | |
| 390 if (parametersHeader != null) { | |
| 391 int numEmptyParameters = 0; | |
| 392 final parameterDescriptions = root.queryAll("dd"); | |
| 393 for (Element parameterDescription in parameterDescriptions) { | |
| 394 if (parameterDescription.text.trim().length == 0) { | |
| 395 numEmptyParameters++; | |
| 396 } | |
| 397 } | |
| 398 if (numEmptyParameters > 0 && | |
| 399 numEmptyParameters == parameterDescriptions.length) { | |
| 400 // Remove the parameter list as it adds zero value as all descriptions | |
| 401 // are empty. | |
| 402 parametersHeader.remove(); | |
| 403 for (final e in root.queryAll("dl")) { | |
| 404 e.remove(); | |
| 405 } | |
| 406 } else if (parameterDescriptions.length == 0 && | |
| 407 parametersHeader.nextElementSibling != null && | |
| 408 parametersHeader.nextElementSibling.text.trim() == 'None.') { | |
| 409 // No need to display that the function takes 0 parameters. | |
| 410 parametersHeader.nextElementSibling.remove(); | |
| 411 parametersHeader.remove(); | |
| 412 } | |
| 413 } | |
| 414 | |
| 415 // Heuristic: if the return value is a single word it is a type name not a | |
| 416 // useful text description so suppress it. | |
| 417 if (returnValueHeader != null && | |
| 418 returnValueHeader.nextElementSibling != null && | |
| 419 returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) { | |
| 420 returnValueHeader.nextElementSibling.remove(); | |
| 421 returnValueHeader.remove(); | |
| 422 } | |
| 423 | |
| 424 bool changed = true; | |
| 425 while (changed) { | |
| 426 changed = false; | |
| 427 while (root.nodes.length == 1 && root.nodes.first is Element) { | |
| 428 root = root.nodes.first; | |
| 429 changed = true; | |
| 430 } | |
| 431 | |
| 432 // Trim useless nodes from the front. | |
| 433 while (root.nodes.length > 0 && | |
| 434 isSkippable(root.nodes.first)) { | |
| 435 root.nodes.first.remove(); | |
| 436 changed = true; | |
| 437 } | |
| 438 | |
| 439 // Trim useless nodes from the back. | |
| 440 while (root.nodes.length > 0 && | |
| 441 isSkippable(root.nodes.last)) { | |
| 442 root.nodes.last.remove(); | |
| 443 changed = true; | |
| 444 } | |
| 445 } | |
| 446 return JSONFIXUPHACK(root.innerHTML); | |
| 447 } | |
| 448 | |
| 449 String genPrettyHtmlFromElement(Element e) { | |
| 450 e = e.clone(true); | |
| 451 return genCleanHtml(e); | |
| 452 } | |
| 453 | |
| 454 class PostOrderTraversalIterator implements Iterator<Node> { | |
| 455 | |
| 456 Node _next; | |
| 457 Node _current; | |
| 458 | |
| 459 PostOrderTraversalIterator(Node start) { | |
| 460 _next = _leftMostDescendent(start); | |
| 461 } | |
| 462 | |
| 463 Node get current => _current; | |
| 464 bool get hasNext => _next != null; | |
| 465 | |
| 466 bool moveNext() { | |
| 467 _current = _next; | |
| 468 if (_next == null) return false; | |
| 469 if (_next.nextNode != null) { | |
| 470 _next = _leftMostDescendent(_next.nextNode); | |
| 471 } else { | |
| 472 _next = _next.parent; | |
| 473 } | |
| 474 return true; | |
| 475 } | |
| 476 | |
| 477 static Node _leftMostDescendent(Node n) { | |
| 478 while (n.nodes.length > 0) { | |
| 479 n = n.nodes.first; | |
| 480 } | |
| 481 return n; | |
| 482 } | |
| 483 } | |
| 484 | |
| 485 class PostOrderTraversal extends IterableBase<Node> { | |
| 486 final Node _node; | |
| 487 PostOrderTraversal(this._node); | |
| 488 | |
| 489 Iterator<Node> get iterator => new PostOrderTraversalIterator(_node); | |
| 490 } | |
| 491 | |
| 492 /** | |
| 493 * Estimate what content represents the first line of text within the [section] | |
| 494 * range returning null if there isn't a plausible first line of text that | |
| 495 * contains the string [prop]. We measure the actual rendered client rectangle | |
| 496 * for the text and use heuristics defining how many pixels text can vary by | |
| 497 * and still be viewed as being on the same line. | |
| 498 */ | |
| 499 Range findFirstLine(Range section, String prop) { | |
| 500 final firstLine = newRange(); | |
| 501 firstLine.setStart(section.startContainer, section.startOffset); | |
| 502 | |
| 503 num maxBottom = null; | |
| 504 for (final n in new PostOrderTraversal(section.startContainer)) { | |
| 505 int compareResult = section.comparePoint(n, 0); | |
| 506 if (compareResult == -1) { | |
| 507 // before range so skip. | |
| 508 continue; | |
| 509 } else if (compareResult > 0) { | |
| 510 // After range so exit. | |
| 511 break; | |
| 512 } | |
| 513 | |
| 514 final rect = getClientRect(n); | |
| 515 num bottom = rect.bottom; | |
| 516 if (rect.height > 0 && rect.width > 0) { | |
| 517 if (maxBottom != null && | |
| 518 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) { | |
| 519 break; | |
| 520 } else if (maxBottom == null || maxBottom > bottom) { | |
| 521 maxBottom = bottom; | |
| 522 } | |
| 523 } | |
| 524 | |
| 525 firstLine.setEndAfter(n); | |
| 526 } | |
| 527 | |
| 528 // If the first line of text in the section does not contain the property | |
| 529 // name then we're not confident we are able to extract a high accuracy match | |
| 530 // so we should not return anything. | |
| 531 if (!firstLine.toString().contains(stripWebkit(prop))) { | |
| 532 return null; | |
| 533 } | |
| 534 return firstLine; | |
| 535 } | |
| 536 | |
| 537 /** Find child anchor elements that contain the text [prop]. */ | |
| 538 AnchorElement findAnchorElement(Element root, String prop) { | |
| 539 for (AnchorElement a in root.queryAll("a")) { | |
| 540 if (a.text.contains(prop)) { | |
| 541 return a; | |
| 542 } | |
| 543 } | |
| 544 return null; | |
| 545 } | |
| 546 | |
| 547 // First surrounding element with an ID is safe enough. | |
| 548 Element findTighterRoot(Element elem, Element root) { | |
| 549 Element candidate = elem; | |
| 550 while (root != candidate) { | |
| 551 candidate = candidate.parent; | |
| 552 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { | |
| 553 break; | |
| 554 } | |
| 555 } | |
| 556 return candidate; | |
| 557 } | |
| 558 | |
| 559 // TODO(jacobr): this is very slow and ugly.. consider rewriting or at least | |
| 560 // commenting carefully. | |
| 561 SectionParseResult filteredHtml(Element elem, Element root, String prop, | |
| 562 Function fragmentGeneratedCallback) { | |
| 563 // Using a tighter root avoids false positives at the risk of trimming | |
| 564 // text we shouldn't. | |
| 565 root = findTighterRoot(elem, root); | |
| 566 final range = newRange(); | |
| 567 range.setStartBefore(elem); | |
| 568 | |
| 569 Element current = elem; | |
| 570 while (current != null) { | |
| 571 range.setEndBefore(current); | |
| 572 if (current.classes.contains(DART_REMOVED) && | |
| 573 range.toString().trim().length > 0) { | |
| 574 break; | |
| 575 } | |
| 576 if (current.firstElementChild != null) { | |
| 577 current = current.firstElementChild; | |
| 578 } else { | |
| 579 while (current != null) { | |
| 580 range.setEndAfter(current); | |
| 581 if (current == root) { | |
| 582 current = null; | |
| 583 break; | |
| 584 } | |
| 585 if (current.nextElementSibling != null) { | |
| 586 current = current.nextElementSibling; | |
| 587 break; | |
| 588 } | |
| 589 current = current.parent; | |
| 590 } | |
| 591 } | |
| 592 } | |
| 593 String url = null; | |
| 594 if (prop != null) { | |
| 595 Range firstLine = findFirstLine(range, prop); | |
| 596 if (firstLine != null) { | |
| 597 range.setStart(firstLine.endContainer, firstLine.endOffset); | |
| 598 DocumentFragment firstLineClone = firstLine.cloneContents(); | |
| 599 AnchorElement anchor = findAnchorElement(firstLineClone, prop); | |
| 600 if (anchor != null) { | |
| 601 url = getAbsoluteUrl(anchor); | |
| 602 } | |
| 603 } | |
| 604 } | |
| 605 final fragment = range.cloneContents(); | |
| 606 if (fragmentGeneratedCallback != null) { | |
| 607 fragmentGeneratedCallback(fragment); | |
| 608 } | |
| 609 // Strip tags we don't want | |
| 610 for (Element e in fragment.queryAll("script, object, style")) { | |
| 611 e.remove(); | |
| 612 } | |
| 613 | |
| 614 // Extract idl | |
| 615 final idl = new StringBuffer(); | |
| 616 if (prop != null && prop.length > 0) { | |
| 617 // Only expect properties to have HTML. | |
| 618 for(Element e in fragment.queryAll(IDL_SELECTOR)) { | |
| 619 idl.write(e.outerHTML); | |
| 620 e.remove(); | |
| 621 } | |
| 622 // TODO(jacobr) this is a very basic regex to see if text looks like IDL | |
| 623 RegExp likelyIdl = new RegExp(" $prop\\w*\\("); | |
| 624 | |
| 625 for (Element e in fragment.queryAll("pre")) { | |
| 626 // Check if it looks like idl... | |
| 627 String txt = e.text.trim(); | |
| 628 if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) { | |
| 629 idl.write(e.outerHTML); | |
| 630 e.remove(); | |
| 631 } | |
| 632 } | |
| 633 } | |
| 634 return new SectionParseResult(genCleanHtml(fragment), url, idl.toString()); | |
| 635 } | |
| 636 | |
| 637 /** | |
| 638 * Find the best child element of [root] that appears to be an API definition | |
| 639 * for [prop]. [allText] is a list of all text nodes under root computed by | |
| 640 * the caller to improve performance. | |
| 641 */ | |
| 642 Element findBest(Element root, List<Text> allText, String prop, | |
| 643 String propType) { | |
| 644 // Best bet: find a child of root where the id matches the property name. | |
| 645 Element cand = root.query("#$prop"); | |
| 646 | |
| 647 if (cand == null && propType == "methods") { | |
| 648 cand = root.query("[id=$prop\\(\\)]"); | |
| 649 } | |
| 650 while (cand != null && cand.text.trim().length == 0) { | |
| 651 // We found the bookmark for the element but sadly it is just an empty | |
| 652 // placeholder. Find the first real element. | |
| 653 cand = cand.nextElementSibling; | |
| 654 } | |
| 655 if (cand != null) { | |
| 656 return cand; | |
| 657 } | |
| 658 | |
| 659 // If we are at least 70 pixels from the left, something is definitely | |
| 660 // fishy and we shouldn't even consider this candidate as nobody visually | |
| 661 // formats API docs like that. | |
| 662 num candLeft = 70; | |
| 663 | |
| 664 for (Text text in allText) { | |
| 665 Element proposed = null; | |
| 666 | |
| 667 // TODO(jacobr): does it hurt precision to use the full cleanup? | |
| 668 String t = fullNameCleanup(text.text); | |
| 669 if (t == prop) { | |
| 670 proposed = text.parent; | |
| 671 ClientRect candRect = getClientRect(proposed); | |
| 672 | |
| 673 // TODO(jacobr): this is a good heuristic | |
| 674 // if (selObj.selector.indexOf(" > DD ") == -1 | |
| 675 if (candRect.left < candLeft) { | |
| 676 cand = proposed; | |
| 677 candLeft = candRect.left; | |
| 678 } | |
| 679 } | |
| 680 } | |
| 681 return cand; | |
| 682 } | |
| 683 | |
| 684 /** | |
| 685 * Checks whether [e] is tagged as obsolete or deprecated using heuristics | |
| 686 * for what these tags look like in the MDN docs. | |
| 687 */ | |
| 688 bool isObsolete(Element e) { | |
| 689 RegExp obsoleteRegExp = new RegExp(r"(^|\s)obsolete(?=\s|$)"); | |
| 690 RegExp deprecatedRegExp = new RegExp(r"(^|\s)deprecated(?=\s|$)"); | |
| 691 for (Element child in e.queryAll("span")) { | |
| 692 String t = child.text.toLowerCase(); | |
| 693 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true; | |
| 694 } | |
| 695 | |
| 696 String text = e.text.toLowerCase(); | |
| 697 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text); | |
| 698 } | |
| 699 | |
| 700 bool isFirstCharLowerCase(String str) { | |
| 701 return new RegExp("^[a-z]").hasMatch(str); | |
| 702 } | |
| 703 | |
| 704 /** | |
| 705 * Extracts information from a fragment of HTML only searching under the [root] | |
| 706 * html node. [secitonSelector] specifies the query to use to find candidate | |
| 707 * sections of the document to consider (there may be more than one). | |
| 708 * [currentType] specifies the name of the current class. [members] specifies | |
| 709 * the known class members for this class that we are attempting to find | |
| 710 * documentation for. [propType] indicates whether we are searching for | |
| 711 * methods, properties, constants, or constructors. | |
| 712 */ | |
| 713 void scrapeSection(Element root, String sectionSelector, String currentType, | |
| 714 List members, String propType) { | |
| 715 Map expectedProps = dartIdl[propType]; | |
| 716 | |
| 717 Set<String> alreadyMatchedProperties = new Set<String>(); | |
| 718 bool onlyConsiderTables = false; | |
| 719 ElementList allMatches = root.queryAll(sectionSelector); | |
| 720 if (allMatches.length == 0) { | |
| 721 // If we can't find any matches to the sectionSelector, we fall back to | |
| 722 // considering all tables in the document. This is dangerous so we only | |
| 723 // allow the safer table matching extraction rules for this case. | |
| 724 allMatches = root.queryAll(".fullwidth-table"); | |
| 725 onlyConsiderTables = true; | |
| 726 } | |
| 727 for (Element matchElement in allMatches) { | |
| 728 final match = matchElement.parent; | |
| 729 if (!match.id.startsWith("section") && match.id != "pageText") { | |
| 730 throw "Unexpected element $match"; | |
| 731 } | |
| 732 // We don't want to later display this text a second time while for example | |
| 733 // displaying class level summary information as then we would display | |
| 734 // the same documentation twice. | |
| 735 match.classes.add(DART_REMOVED); | |
| 736 | |
| 737 bool foundProps = false; | |
| 738 | |
| 739 // TODO(jacobr): we should really look for the table tag instead | |
| 740 // add an assert if we are missing something that is a table... | |
| 741 // TODO(jacobr) ignore tables in tables. | |
| 742 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) { | |
| 743 int helpIndex = -1; | |
| 744 num i = 0; | |
| 745 for (Element r in t.queryAll("th, td.header")) { | |
| 746 final txt = r.text.trim().split(" ")[0].toLowerCase(); | |
| 747 if (txt == "description") { | |
| 748 helpIndex = i; | |
| 749 break; | |
| 750 } | |
| 751 i++; | |
| 752 } | |
| 753 | |
| 754 // Figure out which column in the table contains member names by | |
| 755 // tracking how many member names each column contains. | |
| 756 final numMatches = new List<int>(i); | |
| 757 for (int j = 0; j < i; j++) { | |
| 758 numMatches[j] = 0; | |
| 759 } | |
| 760 | |
| 761 // Find the column that seems to have the most names that look like | |
| 762 // expected properties. | |
| 763 for (Element r in t.queryAll("tbody tr")) { | |
| 764 ElementList row = r.elements; | |
| 765 if (row.length == 0 || row.first.classes.contains(".header")) { | |
| 766 continue; | |
| 767 } | |
| 768 | |
| 769 for (int k = 0; k < numMatches.length && k < row.length; k++) { | |
| 770 if (expectedProps.containsKey(fullNameCleanup(row[k].text))) { | |
| 771 numMatches[k]++; | |
| 772 break; | |
| 773 } | |
| 774 } | |
| 775 } | |
| 776 | |
| 777 int propNameIndex = 0; | |
| 778 { | |
| 779 int bestCount = numMatches[0]; | |
| 780 for (int k = 1; k < numMatches.length; k++) { | |
| 781 if (numMatches[k] > bestCount) { | |
| 782 bestCount = numMatches[k]; | |
| 783 propNameIndex = k; | |
| 784 } | |
| 785 } | |
| 786 } | |
| 787 | |
| 788 for (Element r in t.queryAll("tbody tr")) { | |
| 789 final row = r.elements; | |
| 790 if (row.length > propNameIndex && row.length > helpIndex) { | |
| 791 if (row.first.classes.contains(".header")) { | |
| 792 continue; | |
| 793 } | |
| 794 // TODO(jacobr): this code for determining the namestr is needlessly | |
| 795 // messy. | |
| 796 final nameRow = row[propNameIndex]; | |
| 797 AnchorElement a = nameRow.query("a"); | |
| 798 String goodName = ''; | |
| 799 if (a != null) { | |
| 800 goodName = a.text.trim(); | |
| 801 } | |
| 802 String nameStr = nameRow.text; | |
| 803 | |
| 804 Map entry = new Map<String, String>(); | |
| 805 | |
| 806 entry["name"] = fullNameCleanup(nameStr.length > 0 ? | |
| 807 nameStr : goodName); | |
| 808 | |
| 809 final parse = filteredHtml(nameRow, nameRow, entry["name"], null); | |
| 810 String altHelp = parse.html; | |
| 811 | |
| 812 entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ? | |
| 813 altHelp : genPrettyHtmlFromElement(row[helpIndex]); | |
| 814 if (parse.url != null) { | |
| 815 entry["url"] = parse.url; | |
| 816 } | |
| 817 | |
| 818 if (parse.idl.length > 0) { | |
| 819 entry["idl"] = parse.idl; | |
| 820 } | |
| 821 | |
| 822 entry["obsolete"] = isObsolete(r); | |
| 823 | |
| 824 if (entry["name"].length > 0) { | |
| 825 cleanupEntry(members, entry); | |
| 826 alreadyMatchedProperties.add(entry['name']); | |
| 827 foundProps = true; | |
| 828 } | |
| 829 } | |
| 830 } | |
| 831 } | |
| 832 | |
| 833 if (onlyConsiderTables) { | |
| 834 continue; | |
| 835 } | |
| 836 | |
| 837 // After this point we have higher risk tests that attempt to perform | |
| 838 // rudimentary page segmentation. This approach is much more error-prone | |
| 839 // than using tables because the HTML is far less clearly structured. | |
| 840 | |
| 841 final allText = getAllTextNodes(match); | |
| 842 | |
| 843 final pmap = new Map<String, Element>(); | |
| 844 for (final prop in expectedProps.keys) { | |
| 845 if (alreadyMatchedProperties.contains(prop)) { | |
| 846 continue; | |
| 847 } | |
| 848 final e = findBest(match, allText, prop, propType); | |
| 849 if (e != null && !inTable(e)) { | |
| 850 pmap[prop] = e; | |
| 851 } | |
| 852 } | |
| 853 | |
| 854 for (final prop in pmap.keys) { | |
| 855 pmap[prop].classes.add(DART_REMOVED); | |
| 856 } | |
| 857 | |
| 858 // The problem is the MDN docs do place documentation for each method in a | |
| 859 // nice self contained subtree. Instead you will see something like: | |
| 860 | |
| 861 // <h3>drawImage</h3> | |
| 862 // <p>Draw image is an awesome method</p> | |
| 863 // some more info on drawImage here | |
| 864 // <h3>mozDrawWindow</h3> | |
| 865 // <p>This API cannot currently be used by Web content. | |
| 866 // It is chrome only.</p> | |
| 867 // <h3>drawRect</h3> | |
| 868 // <p>Always call drawRect instead of drawImage</p> | |
| 869 // some more info on drawRect here... | |
| 870 | |
| 871 // The trouble is we will easily detect that the drawImage and drawRect | |
| 872 // entries are method definitions because we know to search for these | |
| 873 // method names but we will not detect that mozDrawWindow is a method | |
| 874 // definition as that method doesn't exist in our IDL. Thus if we are not | |
| 875 // careful the definition for the drawImage method will contain the | |
| 876 // definition for the mozDrawWindow method as well which would result in | |
| 877 // broken docs. We solve this problem by finding all content with similar | |
| 878 // visual structure to the already found method definitions. It turns out | |
| 879 // that using the visual position of each element on the page is much | |
| 880 // more reliable than using the DOM structure | |
| 881 // (e.g. section_root > div > h3) for the MDN docs because MDN authors | |
| 882 // carefully check that the documentation for each method comment is | |
| 883 // visually consistent but take less care to check that each | |
| 884 // method comment has identical markup structure. | |
| 885 for (String prop in pmap.keys) { | |
| 886 Element e = pmap[prop]; | |
| 887 ClientRect r = getClientRect(e); | |
| 888 // TODO(jacobr): a lot of these queries are identical and this code | |
| 889 // could easily be optimized. | |
| 890 for (final cand in match.queryAll(e.tagName)) { | |
| 891 // TODO(jacobr): use a negative selector instead. | |
| 892 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) { | |
| 893 final candRect = getClientRect(cand); | |
| 894 // Only consider matches that have similar heights and identical left | |
| 895 // coordinates. | |
| 896 if (candRect.left == r.left && | |
| 897 (candRect.height - r.height).abs() < 5) { | |
| 898 String propName = fullNameCleanup(cand.text); | |
| 899 if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName) | |
| 900 && !alreadyMatchedProperties.contains(propName)) { | |
| 901 pmap[propName] = cand; | |
| 902 } | |
| 903 } | |
| 904 } | |
| 905 } | |
| 906 } | |
| 907 | |
| 908 // We mark these elements in batch to reduce the number of layouts | |
| 909 // triggered. TODO(jacobr): use new batch based async measurement to make | |
| 910 // this code flow simpler. | |
| 911 for (String prop in pmap.keys) { | |
| 912 Element e = pmap[prop]; | |
| 913 e.classes.add(DART_REMOVED); | |
| 914 } | |
| 915 | |
| 916 // Find likely "subsections" of the main section and mark them with | |
| 917 // DART_REMOVED so we don't include them in member descriptions... which | |
| 918 // would suck. | |
| 919 for (Element e in match.queryAll("[id]")) { | |
| 920 if (e.id.contains(matchElement.id)) { | |
| 921 e.classes.add(DART_REMOVED); | |
| 922 } | |
| 923 } | |
| 924 | |
| 925 for (String prop in pmap.keys) { | |
| 926 Element elem = pmap[prop]; | |
| 927 bool obsolete = false; | |
| 928 final parse = filteredHtml( | |
| 929 elem, match, prop, | |
| 930 (Element e) { | |
| 931 obsolete = isObsolete(e); | |
| 932 }); | |
| 933 Map entry = { | |
| 934 "url" : parse.url, | |
| 935 "name" : prop, | |
| 936 "help" : parse.html, | |
| 937 "obsolete" : obsolete | |
| 938 }; | |
| 939 if (parse.idl.length > 0) { | |
| 940 entry["idl"] = parse.idl; | |
| 941 } | |
| 942 cleanupEntry(members, entry); | |
| 943 } | |
| 944 } | |
| 945 } | |
| 946 | |
| 947 String trimHtml(String html) { | |
| 948 // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc. | |
| 949 return html; | |
| 950 } | |
| 951 | |
| 952 bool maybeName(String name) { | |
| 953 return new RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) || | |
| 954 new RegExp("^[A-Z][A-Z_]*\$").hasMatch(name); | |
| 955 } | |
| 956 | |
| 957 // TODO(jacobr): this element is ugly at the moment but will become easier to | |
| 958 // read once ElementList supports most of the Element functionality. | |
| 959 void markRemoved(var e) { | |
| 960 if (e != null) { | |
| 961 if (e is Element) { | |
| 962 e.classes.add(DART_REMOVED); | |
| 963 } else { | |
| 964 for (Element el in e) { | |
| 965 el.classes.add(DART_REMOVED); | |
| 966 } | |
| 967 } | |
| 968 } | |
| 969 } | |
| 970 | |
| 971 // TODO(jacobr): remove this when the dartium JSON parse handles \n correctly. | |
| 972 String JSONFIXUPHACK(String value) { | |
| 973 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX"); | |
| 974 } | |
| 975 | |
| 976 String mozToWebkit(String name) { | |
| 977 return name.replaceFirst(new RegExp("^moz"), "webkit"); | |
| 978 } | |
| 979 | |
| 980 String stripWebkit(String name) { | |
| 981 return trimPrefix(name, "webkit"); | |
| 982 } | |
| 983 | |
| 984 // TODO(jacobr): be more principled about this. | |
| 985 String fullNameCleanup(String name) { | |
| 986 int parenIndex = name.indexOf('('); | |
| 987 if (parenIndex != -1) { | |
| 988 name = name.substring(0, parenIndex); | |
| 989 } | |
| 990 name = name.split(" ")[0]; | |
| 991 name = name.split("\n")[0]; | |
| 992 name = name.split("\t")[0]; | |
| 993 name = name.split("*")[0]; | |
| 994 name = name.trim(); | |
| 995 name = safeNameCleanup(name); | |
| 996 return name; | |
| 997 } | |
| 998 | |
| 999 // Less agressive than the full name cleanup to avoid overeager matching. | |
| 1000 // TODO(jacobr): be more principled about this. | |
| 1001 String safeNameCleanup(String name) { | |
| 1002 int parenIndex = name.indexOf('('); | |
| 1003 if (parenIndex != -1 && name.indexOf(")") != -1) { | |
| 1004 // TODO(jacobr): workaround bug in: | |
| 1005 // name = name.split("(")[0]; | |
| 1006 name = name.substring(0, parenIndex); | |
| 1007 } | |
| 1008 name = name.trim(); | |
| 1009 name = trimPrefix(name, currentType + "."); | |
| 1010 name = trimPrefix(name, currentType.toLowerCase() + "."); | |
| 1011 name = trimPrefix(name, currentTypeShort + "."); | |
| 1012 name = trimPrefix(name, currentTypeShort.toLowerCase() + "."); | |
| 1013 name = trimPrefix(name, currentTypeTiny + "."); | |
| 1014 name = trimPrefix(name, currentTypeTiny.toLowerCase() + "."); | |
| 1015 name = name.trim(); | |
| 1016 name = mozToWebkit(name); | |
| 1017 return name; | |
| 1018 } | |
| 1019 | |
| 1020 /** | |
| 1021 * Remove h1, h2, and h3 headers. | |
| 1022 */ | |
| 1023 void removeHeaders(DocumentFragment fragment) { | |
| 1024 for (Element e in fragment.queryAll("h1, h2, h3")) { | |
| 1025 e.remove(); | |
| 1026 } | |
| 1027 } | |
| 1028 | |
| 1029 /** | |
| 1030 * Given an [entry] representing a single method or property cleanup the | |
| 1031 * values performing some simple normalization and only adding the entry to | |
| 1032 * [members] if it has a valid name. | |
| 1033 */ | |
| 1034 void cleanupEntry(List members, Map entry) { | |
| 1035 if (entry.containsKey('help')) { | |
| 1036 entry['help'] = trimHtml(entry['help']); | |
| 1037 } | |
| 1038 String name = fullNameCleanup(entry['name']); | |
| 1039 entry['name'] = name; | |
| 1040 if (maybeName(name)) { | |
| 1041 for (String key in entry.keys) { | |
| 1042 var value = entry[key]; | |
| 1043 if (value == null) { | |
| 1044 entry.remove(key); | |
| 1045 continue; | |
| 1046 } | |
| 1047 if (value is String) { | |
| 1048 entry[key] = JSONFIXUPHACK(value); | |
| 1049 } | |
| 1050 } | |
| 1051 members.add(entry); | |
| 1052 } | |
| 1053 } | |
| 1054 | |
| 1055 // TODO(jacobr) dup with trim start.... | |
| 1056 String trimPrefix(String str, String prefix) { | |
| 1057 if (str.indexOf(prefix) == 0) { | |
| 1058 return str.substring(prefix.length); | |
| 1059 } else { | |
| 1060 return str; | |
| 1061 } | |
| 1062 } | |
| 1063 | |
| 1064 String trimStart(String str, String start) { | |
| 1065 if (str.startsWith(start) && str.length > start.length) { | |
| 1066 return str.substring(start.length); | |
| 1067 } | |
| 1068 return str; | |
| 1069 } | |
| 1070 | |
| 1071 String trimEnd(String str, String end) { | |
| 1072 if (str.endsWith(end) && str.length > end.length) { | |
| 1073 return str.substring(0, str.length - end.length); | |
| 1074 } | |
| 1075 return str; | |
| 1076 } | |
| 1077 | |
| 1078 /** | |
| 1079 * Extract a section with name [key] using [selector] to find start points for | |
| 1080 * the section in the document. | |
| 1081 */ | |
| 1082 void extractSection(String selector, String key) { | |
| 1083 for (Element e in document.queryAll(selector)) { | |
| 1084 e = e.parent; | |
| 1085 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) { | |
| 1086 skip.remove(); | |
| 1087 } | |
| 1088 String html = filteredHtml(e, e, null, removeHeaders).html; | |
| 1089 if (html.length > 0) { | |
| 1090 if (dbEntry.containsKey(key)) { | |
| 1091 dbEntry[key] += html; | |
| 1092 } else { | |
| 1093 dbEntry[key] = html; | |
| 1094 } | |
| 1095 } | |
| 1096 e.classes.add(DART_REMOVED); | |
| 1097 } | |
| 1098 } | |
| 1099 | |
| 1100 void run() { | |
| 1101 // Inject CSS to ensure lines don't wrap unless they were intended to. | |
| 1102 // This is needed to make the logic to determine what is a single line | |
| 1103 // behave consistently even for very long method names. | |
| 1104 document.head.nodes.add(new Element.html(""" | |
| 1105 <style type="text/css"> | |
| 1106 body { | |
| 1107 width: 10000px; | |
| 1108 } | |
| 1109 </style>""")); | |
| 1110 | |
| 1111 String title = trimEnd(window.document.title.trim(), " - MDN"); | |
| 1112 dbEntry['title'] = title; | |
| 1113 | |
| 1114 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best | |
| 1115 // place to do this... | |
| 1116 // TODO(jacobr): move this to right before we extract HTML. | |
| 1117 | |
| 1118 // Remove the "Introduced in HTML <version>" boxes. | |
| 1119 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) { | |
| 1120 e.remove(); | |
| 1121 } | |
| 1122 | |
| 1123 // Flatten the list of known DOM types into a faster and case-insensitive | |
| 1124 // map. | |
| 1125 domTypes = {}; | |
| 1126 for (final domType in domTypesRaw) { | |
| 1127 domTypes[domType.toLowerCase()] = domType; | |
| 1128 } | |
| 1129 | |
| 1130 // Fix up links. | |
| 1131 final SHORT_LINK = new RegExp(r'^[\w/]+$'); | |
| 1132 final INNER_LINK = new RegExp(r'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$'); | |
| 1133 final MEMBER_LINK = new RegExp(r'(\w+)[.#](\w+)'); | |
| 1134 final RELATIVE_LINK = new RegExp(r'^(?:../)*/?[Ee][Nn]/(.+)'); | |
| 1135 | |
| 1136 // - Make relative links absolute. | |
| 1137 // - If we can, take links that point to other MDN pages and retarget them | |
| 1138 // to appropriate pages in our docs. | |
| 1139 // TODO(rnystrom): Add rel external to links we didn't fix. | |
| 1140 for (AnchorElement a in document.queryAll('a')) { | |
| 1141 // Get the raw attribute because we *don't* want the browser to fully- | |
| 1142 // qualify the name for us since it has the wrong base address for the | |
| 1143 // page. | |
| 1144 var href = a.attributes['href']; | |
| 1145 | |
| 1146 // Ignore busted links. | |
| 1147 if (href == null) continue; | |
| 1148 | |
| 1149 // If we can recognize what it's pointing to, point it to our page instead. | |
| 1150 tryToLinkToRealType(maybeType) { | |
| 1151 // See if we know a type with that name. | |
| 1152 final realType = domTypes[maybeType.toLowerCase()]; | |
| 1153 if (realType != null) { | |
| 1154 href = '../html/$realType.html'; | |
| 1155 } | |
| 1156 } | |
| 1157 | |
| 1158 // If it's a relative link (that we know how to root), make it absolute. | |
| 1159 var match = RELATIVE_LINK.firstMatch(href); | |
| 1160 if (match != null) { | |
| 1161 href = 'https://developer.mozilla.org/en/${match[1]}'; | |
| 1162 } | |
| 1163 | |
| 1164 // If it's a word link like "foo" find a type or make it absolute. | |
| 1165 match = SHORT_LINK.firstMatch(href); | |
| 1166 if (match != null) { | |
| 1167 href = 'https://developer.mozilla.org/en/DOM/${match[0]}'; | |
| 1168 } | |
| 1169 | |
| 1170 // TODO(rnystrom): This is a terrible way to do this. Should use the real | |
| 1171 // mapping from DOM names to html class names that we use elsewhere in the | |
| 1172 // DOM scripts. | |
| 1173 match = INNER_LINK.firstMatch(href); | |
| 1174 if (match != null) { | |
| 1175 // See if we're linking to a member ("type.name" or "type#name") or just | |
| 1176 // a type ("type"). | |
| 1177 final member = MEMBER_LINK.firstMatch(match[1]); | |
| 1178 if (member != null) { | |
| 1179 tryToLinkToRealType(member[1]); | |
| 1180 } else { | |
| 1181 tryToLinkToRealType(match[1]); | |
| 1182 } | |
| 1183 } | |
| 1184 | |
| 1185 // Put it back into the element. | |
| 1186 a.attributes['href'] = href; | |
| 1187 } | |
| 1188 | |
| 1189 if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) { | |
| 1190 bool foundMatch = false; | |
| 1191 // Test out if the title is really an HTML tag that matches the | |
| 1192 // current class name. | |
| 1193 for (String tag in [title.split(" ")[0], title.split(".").last]) { | |
| 1194 try { | |
| 1195 Element element = new Element.tag(tag); | |
| 1196 // TODO(jacobr): this is a really ugly way of doing this that will | |
| 1197 // stop working at some point soon. | |
| 1198 if (element.typeName == currentType) { | |
| 1199 foundMatch = true; | |
| 1200 break; | |
| 1201 } | |
| 1202 } catch (e) {} | |
| 1203 } | |
| 1204 if (!foundMatch) { | |
| 1205 dbEntry['skipped'] = true; | |
| 1206 dbEntry['cause'] = "Suspect title"; | |
| 1207 onEnd(); | |
| 1208 return; | |
| 1209 } | |
| 1210 } | |
| 1211 | |
| 1212 Element root = document.query(".pageText"); | |
| 1213 if (root == null) { | |
| 1214 dbEntry['cause'] = '.pageText not found'; | |
| 1215 onEnd(); | |
| 1216 return; | |
| 1217 } | |
| 1218 | |
| 1219 markRemoved(root.query("#Notes")); | |
| 1220 List members = dbEntry['members']; | |
| 1221 | |
| 1222 // This is a laundry list of CSS selectors for boilerplate content on the | |
| 1223 // MDN pages that we should ignore for the purposes of extracting | |
| 1224 // documentation. | |
| 1225 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar")); | |
| 1226 markRemoved(document.queryAll("#article-nav")); | |
| 1227 markRemoved(document.queryAll(".hideforedit")); | |
| 1228 markRemoved(document.queryAll(".navbox")); | |
| 1229 markRemoved(document.query("#Method_overview")); | |
| 1230 markRemoved(document.queryAll("h1, h2")); | |
| 1231 | |
| 1232 scrapeSection(root, "#Methods", currentType, members, 'methods'); | |
| 1233 scrapeSection(root, "#Constants, #Error_codes, #State_constants", | |
| 1234 currentType, members, 'constants'); | |
| 1235 // TODO(jacobr): infer tables based on multiple matches rather than | |
| 1236 // using a hard coded list of section ids. | |
| 1237 scrapeSection(root, | |
| 1238 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " + | |
| 1239 "#DOM_properties, #Event_handlers, #Event_Handlers", | |
| 1240 currentType, members, 'properties'); | |
| 1241 | |
| 1242 // Avoid doing this till now to avoid messing up the section scrape. | |
| 1243 markRemoved(document.queryAll("h3")); | |
| 1244 | |
| 1245 ElementList examples = root.queryAll("span[id^=example], span[id^=Example]"); | |
| 1246 | |
| 1247 extractSection("#See_also", 'seeAlso'); | |
| 1248 extractSection("#Specification, #Specifications", "specification"); | |
| 1249 | |
| 1250 // TODO(jacobr): actually extract the constructor(s) | |
| 1251 extractSection("#Constructor, #Constructors", 'constructor'); | |
| 1252 extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); | |
| 1253 | |
| 1254 // Extract examples. | |
| 1255 List<String> exampleHtml = []; | |
| 1256 for (Element e in examples) { | |
| 1257 e.classes.add(DART_REMOVED); | |
| 1258 } | |
| 1259 for (Element e in examples) { | |
| 1260 String html = filteredHtml(e, root, null, | |
| 1261 (DocumentFragment fragment) { | |
| 1262 removeHeaders(fragment); | |
| 1263 if (fragment.text.trim().toLowerCase() == "example") { | |
| 1264 // Degenerate example. | |
| 1265 fragment.nodes.clear(); | |
| 1266 } | |
| 1267 }).html; | |
| 1268 if (html.length > 0) { | |
| 1269 exampleHtml.add(html); | |
| 1270 } | |
| 1271 } | |
| 1272 if (exampleHtml.length > 0) { | |
| 1273 dbEntry['examples'] = exampleHtml; | |
| 1274 } | |
| 1275 | |
| 1276 // Extract the class summary. | |
| 1277 // Basically everything left over after the #Summary or #Description tag is | |
| 1278 // safe to include in the summary. | |
| 1279 StringBuffer summary = new StringBuffer(); | |
| 1280 for (Element e in root.queryAll("#Summary, #Description")) { | |
| 1281 summary.write(filteredHtml(root, e, null, removeHeaders).html); | |
| 1282 } | |
| 1283 | |
| 1284 if (summary.length == 0) { | |
| 1285 // Remove the "Gecko DOM Reference text" | |
| 1286 Element ref = root.query(".lang.lang-en"); | |
| 1287 if (ref != null) { | |
| 1288 ref = ref.parent; | |
| 1289 String refText = ref.text.trim(); | |
| 1290 if (refText == "Gecko DOM Reference" || | |
| 1291 refText == "« Gecko DOM Reference") { | |
| 1292 ref.remove(); | |
| 1293 } | |
| 1294 } | |
| 1295 // Risky... this might add stuff we shouldn't. | |
| 1296 summary.write(filteredHtml(root, root, null, removeHeaders).html); | |
| 1297 } | |
| 1298 | |
| 1299 if (summary.length > 0) { | |
| 1300 dbEntry['summary'] = summary.toString(); | |
| 1301 } | |
| 1302 | |
| 1303 // Inject CSS to aid debugging in the browser. | |
| 1304 // We could avoid doing this if we know we are not running in a browser.. | |
| 1305 document.head.nodes.add(new Element.html(DEBUG_CSS)); | |
| 1306 | |
| 1307 onEnd(); | |
| 1308 } | |
| 1309 | |
| 1310 void main() { | |
| 1311 window.on.load.add(documentLoaded); | |
| 1312 } | |
| 1313 | |
| 1314 void documentLoaded(event) { | |
| 1315 // Load the database of expected methods and properties with an HttpRequest. | |
| 1316 new HttpRequest.get('${window.location}.json', (req) { | |
| 1317 data = JSON.decode(req.responseText); | |
| 1318 dbEntry = {'members': [], 'srcUrl': pageUrl}; | |
| 1319 run(); | |
| 1320 }); | |
| 1321 } | |
| OLD | NEW |