| OLD | NEW |
| (Empty) |
| 1 // Copyright (C) 2006 Google Inc. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 | |
| 15 | |
| 16 /** | |
| 17 * @fileoverview | |
| 18 * some functions for browser-side pretty printing of code contained in html. | |
| 19 * | |
| 20 * <p> | |
| 21 * For a fairly comprehensive set of languages see the | |
| 22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#lan
gs">README</a> | |
| 23 * file that came with this source. At a minimum, the lexer should work on a | |
| 24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML, | |
| 25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk | |
| 26 * and a subset of Perl, but, because of commenting conventions, doesn't work on | |
| 27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. | |
| 28 * <p> | |
| 29 * Usage: <ol> | |
| 30 * <li> include this source file in an html page via | |
| 31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} | |
| 32 * <li> define style rules. See the example page for examples. | |
| 33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
| 34 * {@code class=prettyprint.} | |
| 35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty | |
| 36 * printer needs to do more substantial DOM manipulations to support that, so | |
| 37 * some css styles may not be preserved. | |
| 38 * </ol> | |
| 39 * That's it. I wanted to keep the API as simple as possible, so there's no | |
| 40 * need to specify which language the code is in, but if you wish, you can add | |
| 41 * another class to the {@code <pre>} or {@code <code>} element to specify the | |
| 42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that | |
| 43 * starts with "lang-" followed by a file extension, specifies the file type. | |
| 44 * See the "lang-*.js" files in this directory for code that implements | |
| 45 * per-language file handlers. | |
| 46 * <p> | |
| 47 * Change log:<br> | |
| 48 * cbeust, 2006/08/22 | |
| 49 * <blockquote> | |
| 50 * Java annotations (start with "@") are now captured as literals ("lit") | |
| 51 * </blockquote> | |
| 52 * @requires console | |
| 53 */ | |
| 54 | |
| 55 // JSLint declarations | |
| 56 /*global console, document, navigator, setTimeout, window, define */ | |
| 57 | |
| 58 /** @define {boolean} */ | |
| 59 var IN_GLOBAL_SCOPE = true; | |
| 60 | |
| 61 /** | |
| 62 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with | |
| 63 * UI events. | |
| 64 * If set to {@code false}, {@code prettyPrint()} is synchronous. | |
| 65 */ | |
| 66 window['PR_SHOULD_USE_CONTINUATION'] = true; | |
| 67 | |
| 68 /** | |
| 69 * Pretty print a chunk of code. | |
| 70 * @param {string} sourceCodeHtml The HTML to pretty print. | |
| 71 * @param {string} opt_langExtension The language name to use. | |
| 72 * Typically, a filename extension like 'cpp' or 'java'. | |
| 73 * @param {number|boolean} opt_numberLines True to number lines, | |
| 74 * or the 1-indexed number of the first line in sourceCodeHtml. | |
| 75 * @return {string} code as html, but prettier | |
| 76 */ | |
| 77 var prettyPrintOne; | |
| 78 /** | |
| 79 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
| 80 * {@code class=prettyprint} and prettify them. | |
| 81 * | |
| 82 * @param {Function} opt_whenDone called when prettifying is done. | |
| 83 * @param {HTMLElement|HTMLDocument} opt_root an element or document | |
| 84 * containing all the elements to pretty print. | |
| 85 * Defaults to {@code document.body}. | |
| 86 */ | |
| 87 var prettyPrint; | |
| 88 | |
| 89 | |
| 90 (function () { | |
| 91 var win = window; | |
| 92 // Keyword lists for various languages. | |
| 93 // We use things that coerce to strings to make them compact when minified | |
| 94 // and to defeat aggressive optimizers that fold large string constants. | |
| 95 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; | |
| 96 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + | |
| 97 "double,enum,extern,float,goto,inline,int,long,register,short,signed," + | |
| 98 "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"]; | |
| 99 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + | |
| 100 "new,operator,private,protected,public,this,throw,true,try,typeof"]; | |
| 101 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + | |
| 102 "concept,concept_map,const_cast,constexpr,decltype,delegate," + | |
| 103 "dynamic_cast,explicit,export,friend,generic,late_check," + | |
| 104 "mutable,namespace,nullptr,property,reinterpret_cast,static_assert," + | |
| 105 "static_cast,template,typeid,typename,using,virtual,where"]; | |
| 106 var JAVA_KEYWORDS = [COMMON_KEYWORDS, | |
| 107 "abstract,assert,boolean,byte,extends,final,finally,implements,import," + | |
| 108 "instanceof,interface,null,native,package,strictfp,super,synchronized," + | |
| 109 "throws,transient"]; | |
| 110 var CSHARP_KEYWORDS = [JAVA_KEYWORDS, | |
| 111 "as,base,by,checked,decimal,delegate,descending,dynamic,event," + | |
| 112 "fixed,foreach,from,group,implicit,in,internal,into,is,let," + | |
| 113 "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte," + | |
| 114 "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort," + | |
| 115 "var,virtual,where"]; | |
| 116 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + | |
| 117 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + | |
| 118 "throw,true,try,unless,until,when,while,yes"; | |
| 119 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, | |
| 120 "debugger,eval,export,function,get,null,set,undefined,var,with," + | |
| 121 "Infinity,NaN"]; | |
| 122 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," + | |
| 123 "goto,if,import,last,local,my,next,no,our,print,package,redo,require," + | |
| 124 "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; | |
| 125 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + | |
| 126 "elif,except,exec,finally,from,global,import,in,is,lambda," + | |
| 127 "nonlocal,not,or,pass,print,raise,try,with,yield," + | |
| 128 "False,True,None"]; | |
| 129 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + | |
| 130 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + | |
| 131 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + | |
| 132 "BEGIN,END"]; | |
| 133 var RUST_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "as,assert,const,copy,drop," + | |
| 134 "enum,extern,fail,false,fn,impl,let,log,loop,match,mod,move,mut,priv," + | |
| 135 "pub,pure,ref,self,static,struct,true,trait,type,unsafe,use"]; | |
| 136 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + | |
| 137 "function,in,local,set,then,until"]; | |
| 138 var ALL_KEYWORDS = [ | |
| 139 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, | |
| 140 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; | |
| 141 var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iter
ator|(multi)?(set|map)|bitset|u?(int|float)\d*)\b/; | |
| 142 | |
| 143 // token style names. correspond to css classes | |
| 144 /** | |
| 145 * token style for a string literal | |
| 146 * @const | |
| 147 */ | |
| 148 var PR_STRING = 'str'; | |
| 149 /** | |
| 150 * token style for a keyword | |
| 151 * @const | |
| 152 */ | |
| 153 var PR_KEYWORD = 'kwd'; | |
| 154 /** | |
| 155 * token style for a comment | |
| 156 * @const | |
| 157 */ | |
| 158 var PR_COMMENT = 'com'; | |
| 159 /** | |
| 160 * token style for a type | |
| 161 * @const | |
| 162 */ | |
| 163 var PR_TYPE = 'typ'; | |
| 164 /** | |
| 165 * token style for a literal value. e.g. 1, null, true. | |
| 166 * @const | |
| 167 */ | |
| 168 var PR_LITERAL = 'lit'; | |
| 169 /** | |
| 170 * token style for a punctuation string. | |
| 171 * @const | |
| 172 */ | |
| 173 var PR_PUNCTUATION = 'pun'; | |
| 174 /** | |
| 175 * token style for plain text. | |
| 176 * @const | |
| 177 */ | |
| 178 var PR_PLAIN = 'pln'; | |
| 179 | |
| 180 /** | |
| 181 * token style for an sgml tag. | |
| 182 * @const | |
| 183 */ | |
| 184 var PR_TAG = 'tag'; | |
| 185 /** | |
| 186 * token style for a markup declaration such as a DOCTYPE. | |
| 187 * @const | |
| 188 */ | |
| 189 var PR_DECLARATION = 'dec'; | |
| 190 /** | |
| 191 * token style for embedded source. | |
| 192 * @const | |
| 193 */ | |
| 194 var PR_SOURCE = 'src'; | |
| 195 /** | |
| 196 * token style for an sgml attribute name. | |
| 197 * @const | |
| 198 */ | |
| 199 var PR_ATTRIB_NAME = 'atn'; | |
| 200 /** | |
| 201 * token style for an sgml attribute value. | |
| 202 * @const | |
| 203 */ | |
| 204 var PR_ATTRIB_VALUE = 'atv'; | |
| 205 | |
| 206 /** | |
| 207 * A class that indicates a section of markup that is not code, e.g. to allow | |
| 208 * embedding of line numbers within code listings. | |
| 209 * @const | |
| 210 */ | |
| 211 var PR_NOCODE = 'nocode'; | |
| 212 | |
| 213 | |
| 214 | |
| 215 /** | |
| 216 * A set of tokens that can precede a regular expression literal in | |
| 217 * javascript | |
| 218 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/languag
e/js20/rationale/syntax.html | |
| 219 * has the full list, but I've removed ones that might be problematic when | |
| 220 * seen in languages that don't support regular expression literals. | |
| 221 * | |
| 222 * <p>Specifically, I've removed any keywords that can't precede a regexp | |
| 223 * literal in a syntactically legal javascript program, and I've removed the | |
| 224 * "in" keyword since it's not a keyword in many languages, and might be used | |
| 225 * as a count of inches. | |
| 226 * | |
| 227 * <p>The link above does not accurately describe EcmaScript rules since | |
| 228 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works | |
| 229 * very well in practice. | |
| 230 * | |
| 231 * @private | |
| 232 * @const | |
| 233 */ | |
| 234 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?
|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|
case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*'; | |
| 235 | |
| 236 // CAVEAT: this does not properly handle the case where a regular | |
| 237 // expression immediately follows another since a regular expression may | |
| 238 // have flags for case-sensitivity and the like. Having regexp tokens | |
| 239 // adjacent is not valid in any language I'm aware of, so I'm punting. | |
| 240 // TODO: maybe style special characters inside a regexp as punctuation. | |
| 241 | |
| 242 /** | |
| 243 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally | |
| 244 * matches the union of the sets of strings matched by the input RegExp. | |
| 245 * Since it matches globally, if the input strings have a start-of-input | |
| 246 * anchor (/^.../), it is ignored for the purposes of unioning. | |
| 247 * @param {Array.<RegExp>} regexs non multiline, non-global regexs. | |
| 248 * @return {RegExp} a global regex. | |
| 249 */ | |
| 250 function combinePrefixPatterns(regexs) { | |
| 251 var capturedGroupIndex = 0; | |
| 252 | |
| 253 var needToFoldCase = false; | |
| 254 var ignoreCase = false; | |
| 255 for (var i = 0, n = regexs.length; i < n; ++i) { | |
| 256 var regex = regexs[i]; | |
| 257 if (regex.ignoreCase) { | |
| 258 ignoreCase = true; | |
| 259 } else if (/[a-z]/i.test(regex.source.replace( | |
| 260 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
| 261 needToFoldCase = true; | |
| 262 ignoreCase = false; | |
| 263 break; | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 var escapeCharToCodeUnit = { | |
| 268 'b': 8, | |
| 269 't': 9, | |
| 270 'n': 0xa, | |
| 271 'v': 0xb, | |
| 272 'f': 0xc, | |
| 273 'r': 0xd | |
| 274 }; | |
| 275 | |
| 276 function decodeEscape(charsetPart) { | |
| 277 var cc0 = charsetPart.charCodeAt(0); | |
| 278 if (cc0 !== 92 /* \\ */) { | |
| 279 return cc0; | |
| 280 } | |
| 281 var c1 = charsetPart.charAt(1); | |
| 282 cc0 = escapeCharToCodeUnit[c1]; | |
| 283 if (cc0) { | |
| 284 return cc0; | |
| 285 } else if ('0' <= c1 && c1 <= '7') { | |
| 286 return parseInt(charsetPart.substring(1), 8); | |
| 287 } else if (c1 === 'u' || c1 === 'x') { | |
| 288 return parseInt(charsetPart.substring(2), 16); | |
| 289 } else { | |
| 290 return charsetPart.charCodeAt(1); | |
| 291 } | |
| 292 } | |
| 293 | |
| 294 function encodeEscape(charCode) { | |
| 295 if (charCode < 0x20) { | |
| 296 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
| 297 } | |
| 298 var ch = String.fromCharCode(charCode); | |
| 299 return (ch === '\\' || ch === '-' || ch === ']' || ch === '^') | |
| 300 ? "\\" + ch : ch; | |
| 301 } | |
| 302 | |
| 303 function caseFoldCharset(charSet) { | |
| 304 var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
| 305 new RegExp( | |
| 306 '\\\\u[0-9A-Fa-f]{4}' | |
| 307 + '|\\\\x[0-9A-Fa-f]{2}' | |
| 308 + '|\\\\[0-3][0-7]{0,2}' | |
| 309 + '|\\\\[0-7]{1,2}' | |
| 310 + '|\\\\[\\s\\S]' | |
| 311 + '|-' | |
| 312 + '|[^-\\\\]', | |
| 313 'g')); | |
| 314 var ranges = []; | |
| 315 var inverse = charsetParts[0] === '^'; | |
| 316 | |
| 317 var out = ['[']; | |
| 318 if (inverse) { out.push('^'); } | |
| 319 | |
| 320 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
| 321 var p = charsetParts[i]; | |
| 322 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. | |
| 323 out.push(p); | |
| 324 } else { | |
| 325 var start = decodeEscape(p); | |
| 326 var end; | |
| 327 if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
| 328 end = decodeEscape(charsetParts[i + 2]); | |
| 329 i += 2; | |
| 330 } else { | |
| 331 end = start; | |
| 332 } | |
| 333 ranges.push([start, end]); | |
| 334 // If the range might intersect letters, then expand it. | |
| 335 // This case handling is too simplistic. | |
| 336 // It does not deal with non-latin case folding. | |
| 337 // It works for latin source code identifiers though. | |
| 338 if (!(end < 65 || start > 122)) { | |
| 339 if (!(end < 65 || start > 90)) { | |
| 340 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); | |
| 341 } | |
| 342 if (!(end < 97 || start > 122)) { | |
| 343 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32])
; | |
| 344 } | |
| 345 } | |
| 346 } | |
| 347 } | |
| 348 | |
| 349 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
| 350 // -> [[1, 12], [14, 14], [16, 17]] | |
| 351 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); | |
| 352 var consolidatedRanges = []; | |
| 353 var lastRange = []; | |
| 354 for (var i = 0; i < ranges.length; ++i) { | |
| 355 var range = ranges[i]; | |
| 356 if (range[0] <= lastRange[1] + 1) { | |
| 357 lastRange[1] = Math.max(lastRange[1], range[1]); | |
| 358 } else { | |
| 359 consolidatedRanges.push(lastRange = range); | |
| 360 } | |
| 361 } | |
| 362 | |
| 363 for (var i = 0; i < consolidatedRanges.length; ++i) { | |
| 364 var range = consolidatedRanges[i]; | |
| 365 out.push(encodeEscape(range[0])); | |
| 366 if (range[1] > range[0]) { | |
| 367 if (range[1] + 1 > range[0]) { out.push('-'); } | |
| 368 out.push(encodeEscape(range[1])); | |
| 369 } | |
| 370 } | |
| 371 out.push(']'); | |
| 372 return out.join(''); | |
| 373 } | |
| 374 | |
| 375 function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
| 376 // Split into character sets, escape sequences, punctuation strings | |
| 377 // like ('(', '(?:', ')', '^'), and runs of characters that do not | |
| 378 // include any of the above. | |
| 379 var parts = regex.source.match( | |
| 380 new RegExp( | |
| 381 '(?:' | |
| 382 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
| 383 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
| 384 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
| 385 + '|\\\\[0-9]+' // a back-reference or octal escape | |
| 386 + '|\\\\[^ux0-9]' // other escape sequence | |
| 387 + '|\\(\\?[:!=]' // start of a non-capturing group | |
| 388 + '|[\\(\\)\\^]' // start/end of a group, or line start | |
| 389 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
| 390 + ')', | |
| 391 'g')); | |
| 392 var n = parts.length; | |
| 393 | |
| 394 // Maps captured group numbers to the number they will occupy in | |
| 395 // the output or to -1 if that has not been determined, or to | |
| 396 // undefined if they need not be capturing in the output. | |
| 397 var capturedGroups = []; | |
| 398 | |
| 399 // Walk over and identify back references to build the capturedGroups | |
| 400 // mapping. | |
| 401 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
| 402 var p = parts[i]; | |
| 403 if (p === '(') { | |
| 404 // groups are 1-indexed, so max group index is count of '(' | |
| 405 ++groupIndex; | |
| 406 } else if ('\\' === p.charAt(0)) { | |
| 407 var decimalValue = +p.substring(1); | |
| 408 if (decimalValue) { | |
| 409 if (decimalValue <= groupIndex) { | |
| 410 capturedGroups[decimalValue] = -1; | |
| 411 } else { | |
| 412 // Replace with an unambiguous escape sequence so that | |
| 413 // an octal escape sequence does not turn into a backreference | |
| 414 // to a capturing group from an earlier regex. | |
| 415 parts[i] = encodeEscape(decimalValue); | |
| 416 } | |
| 417 } | |
| 418 } | |
| 419 } | |
| 420 | |
| 421 // Renumber groups and reduce capturing groups to non-capturing groups | |
| 422 // where possible. | |
| 423 for (var i = 1; i < capturedGroups.length; ++i) { | |
| 424 if (-1 === capturedGroups[i]) { | |
| 425 capturedGroups[i] = ++capturedGroupIndex; | |
| 426 } | |
| 427 } | |
| 428 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
| 429 var p = parts[i]; | |
| 430 if (p === '(') { | |
| 431 ++groupIndex; | |
| 432 if (!capturedGroups[groupIndex]) { | |
| 433 parts[i] = '(?:'; | |
| 434 } | |
| 435 } else if ('\\' === p.charAt(0)) { | |
| 436 var decimalValue = +p.substring(1); | |
| 437 if (decimalValue && decimalValue <= groupIndex) { | |
| 438 parts[i] = '\\' + capturedGroups[decimalValue]; | |
| 439 } | |
| 440 } | |
| 441 } | |
| 442 | |
| 443 // Remove any prefix anchors so that the output will match anywhere. | |
| 444 // ^^ really does mean an anchored match though. | |
| 445 for (var i = 0; i < n; ++i) { | |
| 446 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } | |
| 447 } | |
| 448 | |
| 449 // Expand letters to groups to handle mixing of case-sensitive and | |
| 450 // case-insensitive patterns if necessary. | |
| 451 if (regex.ignoreCase && needToFoldCase) { | |
| 452 for (var i = 0; i < n; ++i) { | |
| 453 var p = parts[i]; | |
| 454 var ch0 = p.charAt(0); | |
| 455 if (p.length >= 2 && ch0 === '[') { | |
| 456 parts[i] = caseFoldCharset(p); | |
| 457 } else if (ch0 !== '\\') { | |
| 458 // TODO: handle letters in numeric escapes. | |
| 459 parts[i] = p.replace( | |
| 460 /[a-zA-Z]/g, | |
| 461 function (ch) { | |
| 462 var cc = ch.charCodeAt(0); | |
| 463 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
| 464 }); | |
| 465 } | |
| 466 } | |
| 467 } | |
| 468 | |
| 469 return parts.join(''); | |
| 470 } | |
| 471 | |
| 472 var rewritten = []; | |
| 473 for (var i = 0, n = regexs.length; i < n; ++i) { | |
| 474 var regex = regexs[i]; | |
| 475 if (regex.global || regex.multiline) { throw new Error('' + regex); } | |
| 476 rewritten.push( | |
| 477 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); | |
| 478 } | |
| 479 | |
| 480 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
| 481 } | |
| 482 | |
| 483 /** | |
| 484 * Split markup into a string of source code and an array mapping ranges in | |
| 485 * that string to the text nodes in which they appear. | |
| 486 * | |
| 487 * <p> | |
| 488 * The HTML DOM structure:</p> | |
| 489 * <pre> | |
| 490 * (Element "p" | |
| 491 * (Element "b" | |
| 492 * (Text "print ")) ; #1 | |
| 493 * (Text "'Hello '") ; #2 | |
| 494 * (Element "br") ; #3 | |
| 495 * (Text " + 'World';")) ; #4 | |
| 496 * </pre> | |
| 497 * <p> | |
| 498 * corresponds to the HTML | |
| 499 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> | |
| 500 * | |
| 501 * <p> | |
| 502 * It will produce the output:</p> | |
| 503 * <pre> | |
| 504 * { | |
| 505 * sourceCode: "print 'Hello '\n + 'World';", | |
| 506 * // 1 2 | |
| 507 * // 012345678901234 5678901234567 | |
| 508 * spans: [0, #1, 6, #2, 14, #3, 15, #4] | |
| 509 * } | |
| 510 * </pre> | |
| 511 * <p> | |
| 512 * where #1 is a reference to the {@code "print "} text node above, and so | |
| 513 * on for the other text nodes. | |
| 514 * </p> | |
| 515 * | |
| 516 * <p> | |
| 517 * The {@code} spans array is an array of pairs. Even elements are the start | |
| 518 * indices of substrings, and odd elements are the text nodes (or BR elements) | |
| 519 * that contain the text for those substrings. | |
| 520 * Substrings continue until the next index or the end of the source. | |
| 521 * </p> | |
| 522 * | |
| 523 * @param {Node} node an HTML DOM subtree containing source-code. | |
| 524 * @param {boolean} isPreformatted true if white-space in text nodes should | |
| 525 * be considered significant. | |
| 526 * @return {Object} source code and the text nodes in which they occur. | |
| 527 */ | |
| 528 function extractSourceSpans(node, isPreformatted) { | |
| 529 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
| 530 | |
| 531 var chunks = []; | |
| 532 var length = 0; | |
| 533 var spans = []; | |
| 534 var k = 0; | |
| 535 | |
| 536 function walk(node) { | |
| 537 var type = node.nodeType; | |
| 538 if (type == 1) { // Element | |
| 539 if (nocode.test(node.className)) { return; } | |
| 540 for (var child = node.firstChild; child; child = child.nextSibling) { | |
| 541 walk(child); | |
| 542 } | |
| 543 var nodeName = node.nodeName.toLowerCase(); | |
| 544 if ('br' === nodeName || 'li' === nodeName) { | |
| 545 chunks[k] = '\n'; | |
| 546 spans[k << 1] = length++; | |
| 547 spans[(k++ << 1) | 1] = node; | |
| 548 } | |
| 549 } else if (type == 3 || type == 4) { // Text | |
| 550 var text = node.nodeValue; | |
| 551 if (text.length) { | |
| 552 if (!isPreformatted) { | |
| 553 text = text.replace(/[ \t\r\n]+/g, ' '); | |
| 554 } else { | |
| 555 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. | |
| 556 } | |
| 557 // TODO: handle tabs here? | |
| 558 chunks[k] = text; | |
| 559 spans[k << 1] = length; | |
| 560 length += text.length; | |
| 561 spans[(k++ << 1) | 1] = node; | |
| 562 } | |
| 563 } | |
| 564 } | |
| 565 | |
| 566 walk(node); | |
| 567 | |
| 568 return { | |
| 569 sourceCode: chunks.join('').replace(/\n$/, ''), | |
| 570 spans: spans | |
| 571 }; | |
| 572 } | |
| 573 | |
| 574 /** | |
| 575 * Apply the given language handler to sourceCode and add the resulting | |
| 576 * decorations to out. | |
| 577 * @param {number} basePos the index of sourceCode within the chunk of source | |
| 578 * whose decorations are already present on out. | |
| 579 */ | |
| 580 function appendDecorations(basePos, sourceCode, langHandler, out) { | |
| 581 if (!sourceCode) { return; } | |
| 582 var job = { | |
| 583 sourceCode: sourceCode, | |
| 584 basePos: basePos | |
| 585 }; | |
| 586 langHandler(job); | |
| 587 out.push.apply(out, job.decorations); | |
| 588 } | |
| 589 | |
| 590 var notWs = /\S/; | |
| 591 | |
| 592 /** | |
| 593 * Given an element, if it contains only one child element and any text nodes | |
| 594 * it contains contain only space characters, return the sole child element. | |
| 595 * Otherwise returns undefined. | |
| 596 * <p> | |
| 597 * This is meant to return the CODE element in {@code <pre><code ...>} when | |
| 598 * there is a single child element that contains all the non-space textual | |
| 599 * content, but not to return anything where there are multiple child elements | |
| 600 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there | |
| 601 * is textual content. | |
| 602 */ | |
| 603 function childContentWrapper(element) { | |
| 604 var wrapper = undefined; | |
| 605 for (var c = element.firstChild; c; c = c.nextSibling) { | |
| 606 var type = c.nodeType; | |
| 607 wrapper = (type === 1) // Element Node | |
| 608 ? (wrapper ? element : c) | |
| 609 : (type === 3) // Text Node | |
| 610 ? (notWs.test(c.nodeValue) ? element : wrapper) | |
| 611 : wrapper; | |
| 612 } | |
| 613 return wrapper === element ? undefined : wrapper; | |
| 614 } | |
| 615 | |
| 616 /** Given triples of [style, pattern, context] returns a lexing function, | |
| 617 * The lexing function interprets the patterns to find token boundaries and | |
| 618 * returns a decoration list of the form | |
| 619 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] | |
| 620 * where index_n is an index into the sourceCode, and style_n is a style | |
| 621 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to | |
| 622 * all characters in sourceCode[index_n-1:index_n]. | |
| 623 * | |
| 624 * The stylePatterns is a list whose elements have the form | |
| 625 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. | |
| 626 * | |
| 627 * Style is a style constant like PR_PLAIN, or can be a string of the | |
| 628 * form 'lang-FOO', where FOO is a language extension describing the | |
| 629 * language of the portion of the token in $1 after pattern executes. | |
| 630 * E.g., if style is 'lang-lisp', and group 1 contains the text | |
| 631 * '(hello (world))', then that portion of the token will be passed to the | |
| 632 * registered lisp handler for formatting. | |
| 633 * The text before and after group 1 will be restyled using this decorator | |
| 634 * so decorators should take care that this doesn't result in infinite | |
| 635 * recursion. For example, the HTML lexer rule for SCRIPT elements looks | |
| 636 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match | |
| 637 * '<script>foo()<\/script>', which would cause the current decorator to | |
| 638 * be called with '<script>' which would not match the same rule since | |
| 639 * group 1 must not be empty, so it would be instead styled as PR_TAG by | |
| 640 * the generic tag rule. The handler registered for the 'js' extension would | |
| 641 * then be called with 'foo()', and finally, the current decorator would | |
| 642 * be called with '<\/script>' which would not match the original rule and | |
| 643 * so the generic tag rule would identify it as a tag. | |
| 644 * | |
| 645 * Pattern must only match prefixes, and if it matches a prefix, then that | |
| 646 * match is considered a token with the same style. | |
| 647 * | |
| 648 * Context is applied to the last non-whitespace, non-comment token | |
| 649 * recognized. | |
| 650 * | |
| 651 * Shortcut is an optional string of characters, any of which, if the first | |
| 652 * character, gurantee that this pattern and only this pattern matches. | |
| 653 * | |
| 654 * @param {Array} shortcutStylePatterns patterns that always start with | |
| 655 * a known character. Must have a shortcut string. | |
| 656 * @param {Array} fallthroughStylePatterns patterns that will be tried in | |
| 657 * order if the shortcut ones fail. May have shortcuts. | |
| 658 * | |
| 659 * @return {function (Object)} a | |
| 660 * function that takes source code and returns a list of decorations. | |
| 661 */ | |
| 662 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { | |
| 663 var shortcuts = {}; | |
| 664 var tokenizer; | |
| 665 (function () { | |
| 666 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); | |
| 667 var allRegexs = []; | |
| 668 var regexKeys = {}; | |
| 669 for (var i = 0, n = allPatterns.length; i < n; ++i) { | |
| 670 var patternParts = allPatterns[i]; | |
| 671 var shortcutChars = patternParts[3]; | |
| 672 if (shortcutChars) { | |
| 673 for (var c = shortcutChars.length; --c >= 0;) { | |
| 674 shortcuts[shortcutChars.charAt(c)] = patternParts; | |
| 675 } | |
| 676 } | |
| 677 var regex = patternParts[1]; | |
| 678 var k = '' + regex; | |
| 679 if (!regexKeys.hasOwnProperty(k)) { | |
| 680 allRegexs.push(regex); | |
| 681 regexKeys[k] = null; | |
| 682 } | |
| 683 } | |
| 684 allRegexs.push(/[\0-\uffff]/); | |
| 685 tokenizer = combinePrefixPatterns(allRegexs); | |
| 686 })(); | |
| 687 | |
| 688 var nPatterns = fallthroughStylePatterns.length; | |
| 689 | |
| 690 /** | |
| 691 * Lexes job.sourceCode and produces an output array job.decorations of | |
| 692 * style classes preceded by the position at which they start in | |
| 693 * job.sourceCode in order. | |
| 694 * | |
| 695 * @param {Object} job an object like <pre>{ | |
| 696 * sourceCode: {string} sourceText plain text, | |
| 697 * basePos: {int} position of job.sourceCode in the larger chunk of | |
| 698 * sourceCode. | |
| 699 * }</pre> | |
| 700 */ | |
| 701 var decorate = function (job) { | |
| 702 var sourceCode = job.sourceCode, basePos = job.basePos; | |
| 703 /** Even entries are positions in source in ascending order. Odd enties | |
| 704 * are style markers (e.g., PR_COMMENT) that run from that position until | |
| 705 * the end. | |
| 706 * @type {Array.<number|string>} | |
| 707 */ | |
| 708 var decorations = [basePos, PR_PLAIN]; | |
| 709 var pos = 0; // index into sourceCode | |
| 710 var tokens = sourceCode.match(tokenizer) || []; | |
| 711 var styleCache = {}; | |
| 712 | |
| 713 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
| 714 var token = tokens[ti]; | |
| 715 var style = styleCache[token]; | |
| 716 var match = void 0; | |
| 717 | |
| 718 var isEmbedded; | |
| 719 if (typeof style === 'string') { | |
| 720 isEmbedded = false; | |
| 721 } else { | |
| 722 var patternParts = shortcuts[token.charAt(0)]; | |
| 723 if (patternParts) { | |
| 724 match = token.match(patternParts[1]); | |
| 725 style = patternParts[0]; | |
| 726 } else { | |
| 727 for (var i = 0; i < nPatterns; ++i) { | |
| 728 patternParts = fallthroughStylePatterns[i]; | |
| 729 match = token.match(patternParts[1]); | |
| 730 if (match) { | |
| 731 style = patternParts[0]; | |
| 732 break; | |
| 733 } | |
| 734 } | |
| 735 | |
| 736 if (!match) { // make sure that we make progress | |
| 737 style = PR_PLAIN; | |
| 738 } | |
| 739 } | |
| 740 | |
| 741 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); | |
| 742 if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
| 743 isEmbedded = false; | |
| 744 style = PR_SOURCE; | |
| 745 } | |
| 746 | |
| 747 if (!isEmbedded) { styleCache[token] = style; } | |
| 748 } | |
| 749 | |
| 750 var tokenStart = pos; | |
| 751 pos += token.length; | |
| 752 | |
| 753 if (!isEmbedded) { | |
| 754 decorations.push(basePos + tokenStart, style); | |
| 755 } else { // Treat group 1 as an embedded block of source code. | |
| 756 var embeddedSource = match[1]; | |
| 757 var embeddedSourceStart = token.indexOf(embeddedSource); | |
| 758 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; | |
| 759 if (match[2]) { | |
| 760 // If embeddedSource can be blank, then it would match at the | |
| 761 // beginning which would cause us to infinitely recurse on the | |
| 762 // entire token, so we catch the right context in match[2]. | |
| 763 embeddedSourceEnd = token.length - match[2].length; | |
| 764 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
| 765 } | |
| 766 var lang = style.substring(5); | |
| 767 // Decorate the left of the embedded source | |
| 768 appendDecorations( | |
| 769 basePos + tokenStart, | |
| 770 token.substring(0, embeddedSourceStart), | |
| 771 decorate, decorations); | |
| 772 // Decorate the embedded source | |
| 773 appendDecorations( | |
| 774 basePos + tokenStart + embeddedSourceStart, | |
| 775 embeddedSource, | |
| 776 langHandlerForExtension(lang, embeddedSource), | |
| 777 decorations); | |
| 778 // Decorate the right of the embedded section | |
| 779 appendDecorations( | |
| 780 basePos + tokenStart + embeddedSourceEnd, | |
| 781 token.substring(embeddedSourceEnd), | |
| 782 decorate, decorations); | |
| 783 } | |
| 784 } | |
| 785 job.decorations = decorations; | |
| 786 }; | |
| 787 return decorate; | |
| 788 } | |
| 789 | |
| 790 /** returns a function that produces a list of decorations from source text. | |
| 791 * | |
| 792 * This code treats ", ', and ` as string delimiters, and \ as a string | |
| 793 * escape. It does not recognize perl's qq() style strings. | |
| 794 * It has no special handling for double delimiter escapes as in basic, or | |
| 795 * the tripled delimiters used in python, but should work on those regardless | |
| 796 * although in those cases a single string literal may be broken up into | |
| 797 * multiple adjacent string literals. | |
| 798 * | |
| 799 * It recognizes C, C++, and shell style comments. | |
| 800 * | |
| 801 * @param {Object} options a set of optional parameters. | |
| 802 * @return {function (Object)} a function that examines the source code | |
| 803 * in the input job and builds the decoration list. | |
| 804 */ | |
| 805 function sourceDecorator(options) { | |
| 806 var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
| 807 if (options['tripleQuotedStrings']) { | |
| 808 // '''multi-line-string''', 'single-line-string', and double-quoted | |
| 809 shortcutStylePatterns.push( | |
| 810 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\
'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s
\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
| 811 null, '\'"']); | |
| 812 } else if (options['multiLineStrings']) { | |
| 813 // 'multi-line-string', "multi-line-string" | |
| 814 shortcutStylePatterns.push( | |
| 815 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S
])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
| 816 null, '\'"`']); | |
| 817 } else { | |
| 818 // 'single-line-string', "single-line-string" | |
| 819 shortcutStylePatterns.push( | |
| 820 [PR_STRING, | |
| 821 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, | |
| 822 null, '"\'']); | |
| 823 } | |
| 824 if (options['verbatimStrings']) { | |
| 825 // verbatim-string-literal production from the C# grammar. See issue 93. | |
| 826 fallthroughStylePatterns.push( | |
| 827 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); | |
| 828 } | |
| 829 var hc = options['hashComments']; | |
| 830 if (hc) { | |
| 831 if (options['cStyleComments']) { | |
| 832 if (hc > 1) { // multiline hash comments | |
| 833 shortcutStylePatterns.push( | |
| 834 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); | |
| 835 } else { | |
| 836 // Stop C preprocessor declarations at an unclosed open comment | |
| 837 shortcutStylePatterns.push( | |
| 838 [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|includ
e|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
| 839 null, '#']); | |
| 840 } | |
| 841 // #include <stdio.h> | |
| 842 fallthroughStylePatterns.push( | |
| 843 [PR_STRING, | |
| 844 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\
+\+)?|[a-z]\w*)>/, | |
| 845 null]); | |
| 846 } else { | |
| 847 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); | |
| 848 } | |
| 849 } | |
| 850 if (options['cStyleComments']) { | |
| 851 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); | |
| 852 fallthroughStylePatterns.push( | |
| 853 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); | |
| 854 } | |
| 855 var regexLiterals = options['regexLiterals']; | |
| 856 if (regexLiterals) { | |
| 857 /** | |
| 858 * @const | |
| 859 */ | |
| 860 var regexExcls = regexLiterals > 1 | |
| 861 ? '' // Multiline regex literals | |
| 862 : '\n\r'; | |
| 863 /** | |
| 864 * @const | |
| 865 */ | |
| 866 var regexAny = regexExcls ? '.' : '[\\S\\s]'; | |
| 867 /** | |
| 868 * @const | |
| 869 */ | |
| 870 var REGEX_LITERAL = ( | |
| 871 // A regular expression literal starts with a slash that is | |
| 872 // not followed by * or / so that it is not confused with | |
| 873 // comments. | |
| 874 '/(?=[^/*' + regexExcls + '])' | |
| 875 // and then contains any number of raw characters, | |
| 876 + '(?:[^/\\x5B\\x5C' + regexExcls + ']' | |
| 877 // escape sequences (\x5C), | |
| 878 + '|\\x5C' + regexAny | |
| 879 // or non-nesting character sets (\x5B\x5D); | |
| 880 + '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']' | |
| 881 + '|\\x5C' + regexAny + ')*(?:\\x5D|$))+' | |
| 882 // finally closed by a /. | |
| 883 + '/'); | |
| 884 fallthroughStylePatterns.push( | |
| 885 ['lang-regex', | |
| 886 RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') | |
| 887 ]); | |
| 888 } | |
| 889 | |
| 890 var types = options['types']; | |
| 891 if (types) { | |
| 892 fallthroughStylePatterns.push([PR_TYPE, types]); | |
| 893 } | |
| 894 | |
| 895 var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); | |
| 896 if (keywords.length) { | |
| 897 fallthroughStylePatterns.push( | |
| 898 [PR_KEYWORD, | |
| 899 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), | |
| 900 null]); | |
| 901 } | |
| 902 | |
| 903 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); | |
| 904 | |
| 905 var punctuation = | |
| 906 // The Bash man page says | |
| 907 | |
| 908 // A word is a sequence of characters considered as a single | |
| 909 // unit by GRUB. Words are separated by metacharacters, | |
| 910 // which are the following plus space, tab, and newline: { } | |
| 911 // | & $ ; < > | |
| 912 // ... | |
| 913 | |
| 914 // A word beginning with # causes that word and all remaining | |
| 915 // characters on that line to be ignored. | |
| 916 | |
| 917 // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a | |
| 918 // comment but empirically | |
| 919 // $ echo {#} | |
| 920 // {#} | |
| 921 // $ echo \$# | |
| 922 // $# | |
| 923 // $ echo }# | |
| 924 // }# | |
| 925 | |
| 926 // so /(?:^|[|&;<>\s])/ is more appropriate. | |
| 927 | |
| 928 // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3 | |
| 929 // suggests that this definition is compatible with a | |
| 930 // default mode that tries to use a single token definition | |
| 931 // to recognize both bash/python style comments and C | |
| 932 // preprocessor directives. | |
| 933 | |
| 934 // This definition of punctuation does not include # in the list of | |
| 935 // follow-on exclusions, so # will not be broken before if preceeded | |
| 936 // by a punctuation character. We could try to exclude # after | |
| 937 // [|&;<>] but that doesn't seem to cause many major problems. | |
| 938 // If that does turn out to be a problem, we should change the below | |
| 939 // when hc is truthy to include # in the run of punctuation characters | |
| 940 // only when not followint [|&;<>]. | |
| 941 '^.[^\\s\\w.$@\'"`/\\\\]*'; | |
| 942 if (options['regexLiterals']) { | |
| 943 punctuation += '(?!\s*\/)'; | |
| 944 } | |
| 945 | |
| 946 fallthroughStylePatterns.push( | |
| 947 // TODO(mikesamuel): recognize non-latin letters and numerals in idents | |
| 948 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], | |
| 949 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null], | |
| 950 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], | |
| 951 [PR_LITERAL, | |
| 952 new RegExp( | |
| 953 '^(?:' | |
| 954 // A hex number | |
| 955 + '0x[a-f0-9]+' | |
| 956 // or an octal or decimal number, | |
| 957 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
| 958 // possibly in scientific notation | |
| 959 + '(?:e[+\\-]?\\d+)?' | |
| 960 + ')' | |
| 961 // with an optional modifier like UL for unsigned long | |
| 962 + '[a-z]*', 'i'), | |
| 963 null, '0123456789'], | |
| 964 // Don't treat escaped quotes in bash as starting strings. | |
| 965 // See issue 144. | |
| 966 [PR_PLAIN, /^\\[\s\S]?/, null], | |
| 967 [PR_PUNCTUATION, new RegExp(punctuation), null]); | |
| 968 | |
| 969 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); | |
| 970 } | |
| 971 | |
| 972 var decorateSource = sourceDecorator({ | |
| 973 'keywords': ALL_KEYWORDS, | |
| 974 'hashComments': true, | |
| 975 'cStyleComments': true, | |
| 976 'multiLineStrings': true, | |
| 977 'regexLiterals': true | |
| 978 }); | |
| 979 | |
| 980 /** | |
| 981 * Given a DOM subtree, wraps it in a list, and puts each line into its own | |
| 982 * list item. | |
| 983 * | |
| 984 * @param {Node} node modified in place. Its content is pulled into an | |
| 985 * HTMLOListElement, and each line is moved into a separate list item. | |
| 986 * This requires cloning elements, so the input might not have unique | |
| 987 * IDs after numbering. | |
| 988 * @param {boolean} isPreformatted true iff white-space in text nodes should | |
| 989 * be treated as significant. | |
| 990 */ | |
| 991 function numberLines(node, opt_startLineNum, isPreformatted) { | |
| 992 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
| 993 var lineBreak = /\r\n?|\n/; | |
| 994 | |
| 995 var document = node.ownerDocument; | |
| 996 | |
| 997 var li = document.createElement('li'); | |
| 998 while (node.firstChild) { | |
| 999 li.appendChild(node.firstChild); | |
| 1000 } | |
| 1001 // An array of lines. We split below, so this is initialized to one | |
| 1002 // un-split line. | |
| 1003 var listItems = [li]; | |
| 1004 | |
| 1005 function walk(node) { | |
| 1006 var type = node.nodeType; | |
| 1007 if (type == 1 && !nocode.test(node.className)) { // Element | |
| 1008 if ('br' === node.nodeName) { | |
| 1009 breakAfter(node); | |
| 1010 // Discard the <BR> since it is now flush against a </LI>. | |
| 1011 if (node.parentNode) { | |
| 1012 node.parentNode.removeChild(node); | |
| 1013 } | |
| 1014 } else { | |
| 1015 for (var child = node.firstChild; child; child = child.nextSibling) { | |
| 1016 walk(child); | |
| 1017 } | |
| 1018 } | |
| 1019 } else if ((type == 3 || type == 4) && isPreformatted) { // Text | |
| 1020 var text = node.nodeValue; | |
| 1021 var match = text.match(lineBreak); | |
| 1022 if (match) { | |
| 1023 var firstLine = text.substring(0, match.index); | |
| 1024 node.nodeValue = firstLine; | |
| 1025 var tail = text.substring(match.index + match[0].length); | |
| 1026 if (tail) { | |
| 1027 var parent = node.parentNode; | |
| 1028 parent.insertBefore( | |
| 1029 document.createTextNode(tail), node.nextSibling); | |
| 1030 } | |
| 1031 breakAfter(node); | |
| 1032 if (!firstLine) { | |
| 1033 // Don't leave blank text nodes in the DOM. | |
| 1034 node.parentNode.removeChild(node); | |
| 1035 } | |
| 1036 } | |
| 1037 } | |
| 1038 } | |
| 1039 | |
| 1040 // Split a line after the given node. | |
| 1041 function breakAfter(lineEndNode) { | |
| 1042 // If there's nothing to the right, then we can skip ending the line | |
| 1043 // here, and move root-wards since splitting just before an end-tag | |
| 1044 // would require us to create a bunch of empty copies. | |
| 1045 while (!lineEndNode.nextSibling) { | |
| 1046 lineEndNode = lineEndNode.parentNode; | |
| 1047 if (!lineEndNode) { return; } | |
| 1048 } | |
| 1049 | |
| 1050 function breakLeftOf(limit, copy) { | |
| 1051 // Clone shallowly if this node needs to be on both sides of the break. | |
| 1052 var rightSide = copy ? limit.cloneNode(false) : limit; | |
| 1053 var parent = limit.parentNode; | |
| 1054 if (parent) { | |
| 1055 // We clone the parent chain. | |
| 1056 // This helps us resurrect important styling elements that cross lines
. | |
| 1057 // E.g. in <i>Foo<br>Bar</i> | |
| 1058 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. | |
| 1059 var parentClone = breakLeftOf(parent, 1); | |
| 1060 // Move the clone and everything to the right of the original | |
| 1061 // onto the cloned parent. | |
| 1062 var next = limit.nextSibling; | |
| 1063 parentClone.appendChild(rightSide); | |
| 1064 for (var sibling = next; sibling; sibling = next) { | |
| 1065 next = sibling.nextSibling; | |
| 1066 parentClone.appendChild(sibling); | |
| 1067 } | |
| 1068 } | |
| 1069 return rightSide; | |
| 1070 } | |
| 1071 | |
| 1072 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); | |
| 1073 | |
| 1074 // Walk the parent chain until we reach an unattached LI. | |
| 1075 for (var parent; | |
| 1076 // Check nodeType since IE invents document fragments. | |
| 1077 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { | |
| 1078 copiedListItem = parent; | |
| 1079 } | |
| 1080 // Put it on the list of lines for later processing. | |
| 1081 listItems.push(copiedListItem); | |
| 1082 } | |
| 1083 | |
| 1084 // Split lines while there are lines left to split. | |
| 1085 for (var i = 0; // Number of lines that have been split so far. | |
| 1086 i < listItems.length; // length updated by breakAfter calls. | |
| 1087 ++i) { | |
| 1088 walk(listItems[i]); | |
| 1089 } | |
| 1090 | |
| 1091 // Make sure numeric indices show correctly. | |
| 1092 if (opt_startLineNum === (opt_startLineNum|0)) { | |
| 1093 listItems[0].setAttribute('value', opt_startLineNum); | |
| 1094 } | |
| 1095 | |
| 1096 var ol = document.createElement('ol'); | |
| 1097 ol.className = 'linenums'; | |
| 1098 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0
; | |
| 1099 for (var i = 0, n = listItems.length; i < n; ++i) { | |
| 1100 li = listItems[i]; | |
| 1101 // Stick a class on the LIs so that stylesheets can | |
| 1102 // color odd/even rows, or any other row pattern that | |
| 1103 // is co-prime with 10. | |
| 1104 li.className = 'L' + ((i + offset) % 10); | |
| 1105 if (!li.firstChild) { | |
| 1106 li.appendChild(document.createTextNode('\xA0')); | |
| 1107 } | |
| 1108 ol.appendChild(li); | |
| 1109 } | |
| 1110 | |
| 1111 node.appendChild(ol); | |
| 1112 } | |
| 1113 /** | |
| 1114 * Breaks {@code job.sourceCode} around style boundaries in | |
| 1115 * {@code job.decorations} and modifies {@code job.sourceNode} in place. | |
| 1116 * @param {Object} job like <pre>{ | |
| 1117 * sourceCode: {string} source as plain text, | |
| 1118 * sourceNode: {HTMLElement} the element containing the source, | |
| 1119 * spans: {Array.<number|Node>} alternating span start indices into source | |
| 1120 * and the text node or element (e.g. {@code <BR>}) corresponding to tha
t | |
| 1121 * span. | |
| 1122 * decorations: {Array.<number|string} an array of style classes preceded | |
| 1123 * by the position at which they start in job.sourceCode in order | |
| 1124 * }</pre> | |
| 1125 * @private | |
| 1126 */ | |
| 1127 function recombineTagsAndDecorations(job) { | |
| 1128 var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent); | |
| 1129 isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8; | |
| 1130 var newlineRe = /\n/g; | |
| 1131 | |
| 1132 var source = job.sourceCode; | |
| 1133 var sourceLength = source.length; | |
| 1134 // Index into source after the last code-unit recombined. | |
| 1135 var sourceIndex = 0; | |
| 1136 | |
| 1137 var spans = job.spans; | |
| 1138 var nSpans = spans.length; | |
| 1139 // Index into spans after the last span which ends at or before sourceIndex. | |
| 1140 var spanIndex = 0; | |
| 1141 | |
| 1142 var decorations = job.decorations; | |
| 1143 var nDecorations = decorations.length; | |
| 1144 // Index into decorations after the last decoration which ends at or before | |
| 1145 // sourceIndex. | |
| 1146 var decorationIndex = 0; | |
| 1147 | |
| 1148 // Remove all zero-length decorations. | |
| 1149 decorations[nDecorations] = sourceLength; | |
| 1150 var decPos, i; | |
| 1151 for (i = decPos = 0; i < nDecorations;) { | |
| 1152 if (decorations[i] !== decorations[i + 2]) { | |
| 1153 decorations[decPos++] = decorations[i++]; | |
| 1154 decorations[decPos++] = decorations[i++]; | |
| 1155 } else { | |
| 1156 i += 2; | |
| 1157 } | |
| 1158 } | |
| 1159 nDecorations = decPos; | |
| 1160 | |
| 1161 // Simplify decorations. | |
| 1162 for (i = decPos = 0; i < nDecorations;) { | |
| 1163 var startPos = decorations[i]; | |
| 1164 // Conflate all adjacent decorations that use the same style. | |
| 1165 var startDec = decorations[i + 1]; | |
| 1166 var end = i + 2; | |
| 1167 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { | |
| 1168 end += 2; | |
| 1169 } | |
| 1170 decorations[decPos++] = startPos; | |
| 1171 decorations[decPos++] = startDec; | |
| 1172 i = end; | |
| 1173 } | |
| 1174 | |
| 1175 nDecorations = decorations.length = decPos; | |
| 1176 | |
| 1177 var sourceNode = job.sourceNode; | |
| 1178 var oldDisplay; | |
| 1179 if (sourceNode) { | |
| 1180 oldDisplay = sourceNode.style.display; | |
| 1181 sourceNode.style.display = 'none'; | |
| 1182 } | |
| 1183 try { | |
| 1184 var decoration = null; | |
| 1185 while (spanIndex < nSpans) { | |
| 1186 var spanStart = spans[spanIndex]; | |
| 1187 var spanEnd = spans[spanIndex + 2] || sourceLength; | |
| 1188 | |
| 1189 var decEnd = decorations[decorationIndex + 2] || sourceLength; | |
| 1190 | |
| 1191 var end = Math.min(spanEnd, decEnd); | |
| 1192 | |
| 1193 var textNode = spans[spanIndex + 1]; | |
| 1194 var styledText; | |
| 1195 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s | |
| 1196 // Don't introduce spans around empty text nodes. | |
| 1197 && (styledText = source.substring(sourceIndex, end))) { | |
| 1198 // This may seem bizarre, and it is. Emitting LF on IE causes the | |
| 1199 // code to display with spaces instead of line breaks. | |
| 1200 // Emitting Windows standard issue linebreaks (CRLF) causes a blank | |
| 1201 // space to appear at the beginning of every line but the first. | |
| 1202 // Emitting an old Mac OS 9 line separator makes everything spiffy. | |
| 1203 if (isIE8OrEarlier) { | |
| 1204 styledText = styledText.replace(newlineRe, '\r'); | |
| 1205 } | |
| 1206 textNode.nodeValue = styledText; | |
| 1207 var document = textNode.ownerDocument; | |
| 1208 var span = document.createElement('span'); | |
| 1209 span.className = decorations[decorationIndex + 1]; | |
| 1210 var parentNode = textNode.parentNode; | |
| 1211 parentNode.replaceChild(span, textNode); | |
| 1212 span.appendChild(textNode); | |
| 1213 if (sourceIndex < spanEnd) { // Split off a text node. | |
| 1214 spans[spanIndex + 1] = textNode | |
| 1215 // TODO: Possibly optimize by using '' if there's no flicker. | |
| 1216 = document.createTextNode(source.substring(end, spanEnd)); | |
| 1217 parentNode.insertBefore(textNode, span.nextSibling); | |
| 1218 } | |
| 1219 } | |
| 1220 | |
| 1221 sourceIndex = end; | |
| 1222 | |
| 1223 if (sourceIndex >= spanEnd) { | |
| 1224 spanIndex += 2; | |
| 1225 } | |
| 1226 if (sourceIndex >= decEnd) { | |
| 1227 decorationIndex += 2; | |
| 1228 } | |
| 1229 } | |
| 1230 } finally { | |
| 1231 if (sourceNode) { | |
| 1232 sourceNode.style.display = oldDisplay; | |
| 1233 } | |
| 1234 } | |
| 1235 } | |
| 1236 | |
| 1237 /** Maps language-specific file extensions to handlers. */ | |
| 1238 var langHandlerRegistry = {}; | |
| 1239 /** Register a language handler for the given file extensions. | |
| 1240 * @param {function (Object)} handler a function from source code to a list | |
| 1241 * of decorations. Takes a single argument job which describes the | |
| 1242 * state of the computation. The single parameter has the form | |
| 1243 * {@code { | |
| 1244 * sourceCode: {string} as plain text. | |
| 1245 * decorations: {Array.<number|string>} an array of style classes | |
| 1246 * preceded by the position at which they start in | |
| 1247 * job.sourceCode in order. | |
| 1248 * The language handler should assigned this field. | |
| 1249 * basePos: {int} the position of source in the larger source chunk. | |
| 1250 * All positions in the output decorations array are relative | |
| 1251 * to the larger source chunk. | |
| 1252 * } } | |
| 1253 * @param {Array.<string>} fileExtensions | |
| 1254 */ | |
| 1255 function registerLangHandler(handler, fileExtensions) { | |
| 1256 for (var i = fileExtensions.length; --i >= 0;) { | |
| 1257 var ext = fileExtensions[i]; | |
| 1258 if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
| 1259 langHandlerRegistry[ext] = handler; | |
| 1260 } else if (win['console']) { | |
| 1261 console['warn']('cannot override language handler %s', ext); | |
| 1262 } | |
| 1263 } | |
| 1264 } | |
| 1265 function langHandlerForExtension(extension, source) { | |
| 1266 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
| 1267 // Treat it as markup if the first non whitespace character is a < and | |
| 1268 // the last non-whitespace character is a >. | |
| 1269 extension = /^\s*</.test(source) | |
| 1270 ? 'default-markup' | |
| 1271 : 'default-code'; | |
| 1272 } | |
| 1273 return langHandlerRegistry[extension]; | |
| 1274 } | |
| 1275 registerLangHandler(decorateSource, ['default-code']); | |
| 1276 registerLangHandler( | |
| 1277 createSimpleLexer( | |
| 1278 [], | |
| 1279 [ | |
| 1280 [PR_PLAIN, /^[^<?]+/], | |
| 1281 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], | |
| 1282 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], | |
| 1283 // Unescaped content in an unknown language | |
| 1284 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], | |
| 1285 ['lang-', /^<%([\s\S]+?)(?:%>|$)/], | |
| 1286 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], | |
| 1287 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], | |
| 1288 // Unescaped content in javascript. (Or possibly vbscript). | |
| 1289 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], | |
| 1290 // Contains unescaped stylesheet content | |
| 1291 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], | |
| 1292 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] | |
| 1293 ]), | |
| 1294 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); | |
| 1295 registerLangHandler( | |
| 1296 createSimpleLexer( | |
| 1297 [ | |
| 1298 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], | |
| 1299 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] | |
| 1300 ], | |
| 1301 [ | |
| 1302 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], | |
| 1303 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], | |
| 1304 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], | |
| 1305 [PR_PUNCTUATION, /^[=<>\/]+/], | |
| 1306 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], | |
| 1307 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], | |
| 1308 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], | |
| 1309 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], | |
| 1310 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], | |
| 1311 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] | |
| 1312 ]), | |
| 1313 ['in.tag']); | |
| 1314 registerLangHandler( | |
| 1315 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); | |
| 1316 registerLangHandler(sourceDecorator({ | |
| 1317 'keywords': CPP_KEYWORDS, | |
| 1318 'hashComments': true, | |
| 1319 'cStyleComments': true, | |
| 1320 'types': C_TYPES | |
| 1321 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); | |
| 1322 registerLangHandler(sourceDecorator({ | |
| 1323 'keywords': 'null,true,false' | |
| 1324 }), ['json']); | |
| 1325 registerLangHandler(sourceDecorator({ | |
| 1326 'keywords': CSHARP_KEYWORDS, | |
| 1327 'hashComments': true, | |
| 1328 'cStyleComments': true, | |
| 1329 'verbatimStrings': true, | |
| 1330 'types': C_TYPES | |
| 1331 }), ['cs']); | |
| 1332 registerLangHandler(sourceDecorator({ | |
| 1333 'keywords': JAVA_KEYWORDS, | |
| 1334 'cStyleComments': true | |
| 1335 }), ['java']); | |
| 1336 registerLangHandler(sourceDecorator({ | |
| 1337 'keywords': SH_KEYWORDS, | |
| 1338 'hashComments': true, | |
| 1339 'multiLineStrings': true | |
| 1340 }), ['bash', 'bsh', 'csh', 'sh']); | |
| 1341 registerLangHandler(sourceDecorator({ | |
| 1342 'keywords': PYTHON_KEYWORDS, | |
| 1343 'hashComments': true, | |
| 1344 'multiLineStrings': true, | |
| 1345 'tripleQuotedStrings': true | |
| 1346 }), ['cv', 'py', 'python']); | |
| 1347 registerLangHandler(sourceDecorator({ | |
| 1348 'keywords': PERL_KEYWORDS, | |
| 1349 'hashComments': true, | |
| 1350 'multiLineStrings': true, | |
| 1351 'regexLiterals': 2 // multiline regex literals | |
| 1352 }), ['perl', 'pl', 'pm']); | |
| 1353 registerLangHandler(sourceDecorator({ | |
| 1354 'keywords': RUBY_KEYWORDS, | |
| 1355 'hashComments': true, | |
| 1356 'multiLineStrings': true, | |
| 1357 'regexLiterals': true | |
| 1358 }), ['rb', 'ruby']); | |
| 1359 registerLangHandler(sourceDecorator({ | |
| 1360 'keywords': JSCRIPT_KEYWORDS, | |
| 1361 'cStyleComments': true, | |
| 1362 'regexLiterals': true | |
| 1363 }), ['javascript', 'js']); | |
| 1364 registerLangHandler(sourceDecorator({ | |
| 1365 'keywords': COFFEE_KEYWORDS, | |
| 1366 'hashComments': 3, // ### style block comments | |
| 1367 'cStyleComments': true, | |
| 1368 'multilineStrings': true, | |
| 1369 'tripleQuotedStrings': true, | |
| 1370 'regexLiterals': true | |
| 1371 }), ['coffee']); | |
| 1372 registerLangHandler(sourceDecorator({ | |
| 1373 'keywords': RUST_KEYWORDS, | |
| 1374 'cStyleComments': true, | |
| 1375 'multilineStrings': true | |
| 1376 }), ['rc', 'rs', 'rust']); | |
| 1377 registerLangHandler( | |
| 1378 createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); | |
| 1379 | |
| 1380 function applyDecorator(job) { | |
| 1381 var opt_langExtension = job.langExtension; | |
| 1382 | |
| 1383 try { | |
| 1384 // Extract tags, and convert the source code to plain text. | |
| 1385 var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre); | |
| 1386 /** Plain text. @type {string} */ | |
| 1387 var source = sourceAndSpans.sourceCode; | |
| 1388 job.sourceCode = source; | |
| 1389 job.spans = sourceAndSpans.spans; | |
| 1390 job.basePos = 0; | |
| 1391 | |
| 1392 // Apply the appropriate language handler | |
| 1393 langHandlerForExtension(opt_langExtension, source)(job); | |
| 1394 | |
| 1395 // Integrate the decorations and tags back into the source code, | |
| 1396 // modifying the sourceNode in place. | |
| 1397 recombineTagsAndDecorations(job); | |
| 1398 } catch (e) { | |
| 1399 if (win['console']) { | |
| 1400 console['log'](e && e['stack'] || e); | |
| 1401 } | |
| 1402 } | |
| 1403 } | |
| 1404 | |
| 1405 /** | |
| 1406 * Pretty print a chunk of code. | |
| 1407 * @param sourceCodeHtml {string} The HTML to pretty print. | |
| 1408 * @param opt_langExtension {string} The language name to use. | |
| 1409 * Typically, a filename extension like 'cpp' or 'java'. | |
| 1410 * @param opt_numberLines {number|boolean} True to number lines, | |
| 1411 * or the 1-indexed number of the first line in sourceCodeHtml. | |
| 1412 */ | |
| 1413 function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { | |
| 1414 var container = document.createElement('div'); | |
| 1415 // This could cause images to load and onload listeners to fire. | |
| 1416 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. | |
| 1417 // We assume that the inner HTML is from a trusted source. | |
| 1418 // The pre-tag is required for IE8 which strips newlines from innerHTML | |
| 1419 // when it is injected into a <pre> tag. | |
| 1420 // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when-
setting-innerhtml-in-ie | |
| 1421 // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre-
tag-ie-javascript | |
| 1422 container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>'; | |
| 1423 container = container.firstChild; | |
| 1424 if (opt_numberLines) { | |
| 1425 numberLines(container, opt_numberLines, true); | |
| 1426 } | |
| 1427 | |
| 1428 var job = { | |
| 1429 langExtension: opt_langExtension, | |
| 1430 numberLines: opt_numberLines, | |
| 1431 sourceNode: container, | |
| 1432 pre: 1 | |
| 1433 }; | |
| 1434 applyDecorator(job); | |
| 1435 return container.innerHTML; | |
| 1436 } | |
| 1437 | |
| 1438 /** | |
| 1439 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
| 1440 * {@code class=prettyprint} and prettify them. | |
| 1441 * | |
| 1442 * @param {Function} opt_whenDone called when prettifying is done. | |
| 1443 * @param {HTMLElement|HTMLDocument} opt_root an element or document | |
| 1444 * containing all the elements to pretty print. | |
| 1445 * Defaults to {@code document.body}. | |
| 1446 */ | |
| 1447 function $prettyPrint(opt_whenDone, opt_root) { | |
| 1448 var root = opt_root || document.body; | |
| 1449 var doc = root.ownerDocument || document; | |
| 1450 function byTagName(tn) { return root.getElementsByTagName(tn); } | |
| 1451 // fetch a list of nodes to rewrite | |
| 1452 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; | |
| 1453 var elements = []; | |
| 1454 for (var i = 0; i < codeSegments.length; ++i) { | |
| 1455 for (var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
| 1456 elements.push(codeSegments[i][j]); | |
| 1457 } | |
| 1458 } | |
| 1459 codeSegments = null; | |
| 1460 | |
| 1461 var clock = Date; | |
| 1462 if (!clock['now']) { | |
| 1463 clock = { 'now': function () { return +(new Date); } }; | |
| 1464 } | |
| 1465 | |
| 1466 // The loop is broken into a series of continuations to make sure that we | |
| 1467 // don't make the browser unresponsive when rewriting a large page. | |
| 1468 var k = 0; | |
| 1469 var prettyPrintingJob; | |
| 1470 | |
| 1471 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; | |
| 1472 var prettyPrintRe = /\bprettyprint\b/; | |
| 1473 var prettyPrintedRe = /\bprettyprinted\b/; | |
| 1474 var preformattedTagNameRe = /pre|xmp/i; | |
| 1475 var codeRe = /^code$/i; | |
| 1476 var preCodeXmpRe = /^(?:pre|code|xmp)$/i; | |
| 1477 var EMPTY = {}; | |
| 1478 | |
| 1479 function doWork() { | |
| 1480 var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ? | |
| 1481 clock['now']() + 250 /* ms */ : | |
| 1482 Infinity); | |
| 1483 for (; k < elements.length && clock['now']() < endTime; k++) { | |
| 1484 var cs = elements[k]; | |
| 1485 | |
| 1486 // Look for a preceding comment like | |
| 1487 // <?prettify lang="..." linenums="..."?> | |
| 1488 var attrs = EMPTY; | |
| 1489 { | |
| 1490 for (var preceder = cs; (preceder = preceder.previousSibling);) { | |
| 1491 var nt = preceder.nodeType; | |
| 1492 // <?foo?> is parsed by HTML 5 to a comment node (8) | |
| 1493 // like <!--?foo?-->, but in XML is a processing instruction | |
| 1494 var value = (nt === 7 || nt === 8) && preceder.nodeValue; | |
| 1495 if (value | |
| 1496 ? !/^\??prettify\b/.test(value) | |
| 1497 : (nt !== 3 || /\S/.test(preceder.nodeValue))) { | |
| 1498 // Skip over white-space text nodes but not others. | |
| 1499 break; | |
| 1500 } | |
| 1501 if (value) { | |
| 1502 attrs = {}; | |
| 1503 value.replace( | |
| 1504 /\b(\w+)=([\w:.%+-]+)/g, | |
| 1505 function (_, name, value) { attrs[name] = value; }); | |
| 1506 break; | |
| 1507 } | |
| 1508 } | |
| 1509 } | |
| 1510 | |
| 1511 var className = cs.className; | |
| 1512 if ((attrs !== EMPTY || prettyPrintRe.test(className)) | |
| 1513 // Don't redo this if we've already done it. | |
| 1514 // This allows recalling pretty print to just prettyprint elements | |
| 1515 // that have been added to the page since last call. | |
| 1516 && !prettyPrintedRe.test(className)) { | |
| 1517 | |
| 1518 // make sure this is not nested in an already prettified element | |
| 1519 var nested = false; | |
| 1520 for (var p = cs.parentNode; p; p = p.parentNode) { | |
| 1521 var tn = p.tagName; | |
| 1522 if (preCodeXmpRe.test(tn) | |
| 1523 && p.className && prettyPrintRe.test(p.className)) { | |
| 1524 nested = true; | |
| 1525 break; | |
| 1526 } | |
| 1527 } | |
| 1528 if (!nested) { | |
| 1529 // Mark done. If we fail to prettyprint for whatever reason, | |
| 1530 // we shouldn't try again. | |
| 1531 cs.className += ' prettyprinted'; | |
| 1532 | |
| 1533 // If the classes includes a language extensions, use it. | |
| 1534 // Language extensions can be specified like | |
| 1535 // <pre class="prettyprint lang-cpp"> | |
| 1536 // the language extension "cpp" is used to find a language handler | |
| 1537 // as passed to PR.registerLangHandler. | |
| 1538 // HTML5 recommends that a language be specified using "language-" | |
| 1539 // as the prefix instead. Google Code Prettify supports both. | |
| 1540 // http://dev.w3.org/html5/spec-author-view/the-code-element.html | |
| 1541 var langExtension = attrs['lang']; | |
| 1542 if (!langExtension) { | |
| 1543 langExtension = className.match(langExtensionRe); | |
| 1544 // Support <pre class="prettyprint"><code class="language-c"> | |
| 1545 var wrapper; | |
| 1546 if (!langExtension && (wrapper = childContentWrapper(cs)) | |
| 1547 && codeRe.test(wrapper.tagName)) { | |
| 1548 langExtension = wrapper.className.match(langExtensionRe); | |
| 1549 } | |
| 1550 | |
| 1551 if (langExtension) { langExtension = langExtension[1]; } | |
| 1552 } | |
| 1553 | |
| 1554 var preformatted; | |
| 1555 if (preformattedTagNameRe.test(cs.tagName)) { | |
| 1556 preformatted = 1; | |
| 1557 } else { | |
| 1558 var currentStyle = cs['currentStyle']; | |
| 1559 var defaultView = doc.defaultView; | |
| 1560 var whitespace = ( | |
| 1561 currentStyle | |
| 1562 ? currentStyle['whiteSpace'] | |
| 1563 : (defaultView | |
| 1564 && defaultView.getComputedStyle) | |
| 1565 ? defaultView.getComputedStyle(cs, null) | |
| 1566 .getPropertyValue('white-space') | |
| 1567 : 0); | |
| 1568 preformatted = whitespace | |
| 1569 && 'pre' === whitespace.substring(0, 3); | |
| 1570 } | |
| 1571 | |
| 1572 // Look for a class like linenums or linenums:<n> where <n> is the | |
| 1573 // 1-indexed number of the first line. | |
| 1574 var lineNums = attrs['linenums']; | |
| 1575 if (!(lineNums = lineNums === 'true' || +lineNums)) { | |
| 1576 lineNums = className.match(/\blinenums\b(?::(\d+))?/); | |
| 1577 lineNums = | |
| 1578 lineNums | |
| 1579 ? lineNums[1] && lineNums[1].length | |
| 1580 ? +lineNums[1] : true | |
| 1581 : false; | |
| 1582 } | |
| 1583 if (lineNums) { numberLines(cs, lineNums, preformatted); } | |
| 1584 | |
| 1585 // do the pretty printing | |
| 1586 prettyPrintingJob = { | |
| 1587 langExtension: langExtension, | |
| 1588 sourceNode: cs, | |
| 1589 numberLines: lineNums, | |
| 1590 pre: preformatted | |
| 1591 }; | |
| 1592 applyDecorator(prettyPrintingJob); | |
| 1593 } | |
| 1594 } | |
| 1595 } | |
| 1596 if (k < elements.length) { | |
| 1597 // finish up in a continuation | |
| 1598 setTimeout(doWork, 250); | |
| 1599 } else if ('function' === typeof opt_whenDone) { | |
| 1600 opt_whenDone(); | |
| 1601 } | |
| 1602 } | |
| 1603 | |
| 1604 doWork(); | |
| 1605 } | |
| 1606 | |
| 1607 /** | |
| 1608 * Contains functions for creating and registering new language handlers. | |
| 1609 * @type {Object} | |
| 1610 */ | |
| 1611 var PR = win['PR'] = { | |
| 1612 'createSimpleLexer': createSimpleLexer, | |
| 1613 'registerLangHandler': registerLangHandler, | |
| 1614 'sourceDecorator': sourceDecorator, | |
| 1615 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, | |
| 1616 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, | |
| 1617 'PR_COMMENT': PR_COMMENT, | |
| 1618 'PR_DECLARATION': PR_DECLARATION, | |
| 1619 'PR_KEYWORD': PR_KEYWORD, | |
| 1620 'PR_LITERAL': PR_LITERAL, | |
| 1621 'PR_NOCODE': PR_NOCODE, | |
| 1622 'PR_PLAIN': PR_PLAIN, | |
| 1623 'PR_PUNCTUATION': PR_PUNCTUATION, | |
| 1624 'PR_SOURCE': PR_SOURCE, | |
| 1625 'PR_STRING': PR_STRING, | |
| 1626 'PR_TAG': PR_TAG, | |
| 1627 'PR_TYPE': PR_TYPE, | |
| 1628 'prettyPrintOne': | |
| 1629 IN_GLOBAL_SCOPE | |
| 1630 ? (win['prettyPrintOne'] = $prettyPrintOne) | |
| 1631 : (prettyPrintOne = $prettyPrintOne), | |
| 1632 'prettyPrint': prettyPrint = | |
| 1633 IN_GLOBAL_SCOPE | |
| 1634 ? (win['prettyPrint'] = $prettyPrint) | |
| 1635 : (prettyPrint = $prettyPrint) | |
| 1636 }; | |
| 1637 | |
| 1638 // Make PR available via the Asynchronous Module Definition (AMD) API. | |
| 1639 // Per https://github.com/amdjs/amdjs-api/wiki/AMD: | |
| 1640 // The Asynchronous Module Definition (AMD) API specifies a | |
| 1641 // mechanism for defining modules such that the module and its | |
| 1642 // dependencies can be asynchronously loaded. | |
| 1643 // ... | |
| 1644 // To allow a clear indicator that a global define function (as | |
| 1645 // needed for script src browser loading) conforms to the AMD API, | |
| 1646 // any global define function SHOULD have a property called "amd" | |
| 1647 // whose value is an object. This helps avoid conflict with any | |
| 1648 // other existing JavaScript code that could have defined a define() | |
| 1649 // function that does not conform to the AMD API. | |
| 1650 if (typeof define === "function" && define['amd']) { | |
| 1651 define("google-code-prettify", [], function () { | |
| 1652 return PR; | |
| 1653 }); | |
| 1654 } | |
| 1655 })(); | |
| OLD | NEW |