OLD | NEW |
| (Empty) |
1 // Copyright (C) 2013 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 | |
16 // Looks at query parameters to decide which language handlers and style-sheets | |
17 // to load. | |
18 | |
19 // Query Parameter Format Effect Default | |
20 // +------------------+---------------+------------------------------+--------+ | |
21 // | autorun= | true | false | If true then prettyPrint() | "true" | | |
22 // | | | is called on page load. | | | |
23 // +------------------+---------------+------------------------------+--------+ | |
24 // | lang= | language name | Loads the language handler | Can | | |
25 // | | | named "lang-<NAME>.js". | appear | | |
26 // | | | See available handlers at | many | | |
27 // | | | http://code.google.com/p/ | times. | | |
28 // | | | google-code-prettify/source/ | | | |
29 // | | | browse/trunk/src | | | |
30 // +------------------+---------------+------------------------------+--------+ | |
31 // | skin= | skin name | Loads the skin stylesheet | none. | | |
32 // | | | named "<NAME>.css". | | | |
33 // | | | http://code.google.com/p/ | | | |
34 // | | | google-code-prettify/source/ | | | |
35 // | | | browse/trunk/styles | | | |
36 // +------------------+---------------+------------------------------+--------+ | |
37 // | callback= | JS identifier | When "prettyPrint" finishes | none | | |
38 // | | | window.exports[js_ident] is | | | |
39 // | | | called. | | | |
40 // | | | The callback must be under | | | |
41 // | | | exports to reduce the risk | | | |
42 // | | | of XSS via query parameter | | | |
43 // | | | injection. | | | |
44 // +------------------+---------------+------------------------------+--------+ | |
45 | |
46 // Exmaples | |
47 // .../prettify.js?lang=css&skin=sunburst | |
48 // 1. Loads the CSS language handler which can be used to prettify CSS | |
49 // stylesheets, HTML <style> element bodies and style="..." attributes | |
50 // values. | |
51 // 2. Loads the sunburst.css stylesheet instead of the default prettify.css | |
52 // stylesheet. | |
53 // A gallery of stylesheets is available at | |
54 // https://google-code-prettify.googlecode.com/svn/trunk/styles/index.html | |
55 // 3. Since autorun=false is not specified, calls prettyPrint() on page load. | |
56 | |
57 | |
58 /** @define {boolean} */ | |
59 var IN_GLOBAL_SCOPE = false; | |
60 | |
61 (function () { | |
62 "use strict"; | |
63 | |
64 var win = window; | |
65 var setTimeout = win.setTimeout; | |
66 var doc = document; | |
67 var root = doc.documentElement; | |
68 var head = doc['head'] || doc.getElementsByTagName("head")[0] || root; | |
69 | |
70 // From http://javascript.nwbox.com/ContentLoaded/contentloaded.js | |
71 // Author: Diego Perini (diego.perini at gmail.com) | |
72 // Summary: cross-browser wrapper for DOMContentLoaded | |
73 // Updated: 20101020 | |
74 // License: MIT | |
75 // Version: 1.2 | |
76 function contentLoaded(callback) { | |
77 var addEventListener = doc['addEventListener']; | |
78 var done = false, top = true, | |
79 add = addEventListener ? 'addEventListener' : 'attachEvent', | |
80 rem = addEventListener ? 'removeEventListener' : 'detachEvent', | |
81 pre = addEventListener ? '' : 'on', | |
82 | |
83 init = function(e) { | |
84 if (e.type == 'readystatechange' && doc.readyState != 'complete') { | |
85 return; | |
86 } | |
87 (e.type == 'load' ? win : doc)[rem](pre + e.type, init, false); | |
88 if (!done && (done = true)) { callback.call(win, e.type || e); } | |
89 }, | |
90 | |
91 poll = function() { | |
92 try { | |
93 root.doScroll('left'); | |
94 } catch(e) { | |
95 setTimeout(poll, 50); | |
96 return; | |
97 } | |
98 init('poll'); | |
99 }; | |
100 | |
101 if (doc.readyState == 'complete') { | |
102 callback.call(win, 'lazy'); | |
103 } else { | |
104 if (doc.createEventObject && root.doScroll) { | |
105 try { top = !win.frameElement; } catch(e) { } | |
106 if (top) { poll(); } | |
107 } | |
108 doc[add](pre + 'DOMContentLoaded', init, false); | |
109 doc[add](pre + 'readystatechange', init, false); | |
110 win[add](pre + 'load', init, false); | |
111 } | |
112 } | |
113 | |
114 // Given a list of URLs to stylesheets, loads the first that loads without | |
115 // triggering an error event. | |
116 function loadStylesheetsFallingBack(stylesheets) { | |
117 var n = stylesheets.length; | |
118 function load(i) { | |
119 if (i === n) { return; } | |
120 var link = doc.createElement('link'); | |
121 link.rel = 'stylesheet'; | |
122 link.type = 'text/css'; | |
123 if (i + 1 < n) { | |
124 // http://pieisgood.org/test/script-link-events/ indicates that many | |
125 // versions of IE do not support onerror on <link>s, though | |
126 // http://msdn.microsoft.com/en-us/library/ie/ms535848(v=vs.85).aspx | |
127 // indicates that recent IEs do support error. | |
128 link.error = link.onerror = function () { load(i + 1); }; | |
129 } | |
130 link.href = stylesheets[i]; | |
131 head.appendChild(link); | |
132 } | |
133 load(0); | |
134 } | |
135 | |
136 var scriptQuery = ''; | |
137 // Look for the <script> node that loads this script to get its parameters. | |
138 // This starts looking at the end instead of just considering the last | |
139 // because deferred and async scripts run out of order. | |
140 // If the script is loaded twice, then this will run in reverse order. | |
141 for (var scripts = doc.scripts, i = scripts.length; --i >= 0;) { | |
142 var script = scripts[i]; | |
143 var match = script.src.match( | |
144 /^[^?#]*\/run_prettify\.js(\?[^#]*)?(?:#.*)?$/); | |
145 if (match) { | |
146 scriptQuery = match[1] || ''; | |
147 // Remove the script from the DOM so that multiple runs at least run | |
148 // multiple times even if parameter sets are interpreted in reverse | |
149 // order. | |
150 script.parentNode.removeChild(script); | |
151 break; | |
152 } | |
153 } | |
154 | |
155 // Pull parameters into local variables. | |
156 var autorun = true; | |
157 var langs = []; | |
158 var skins = []; | |
159 var callbacks = []; | |
160 scriptQuery.replace( | |
161 /[?&]([^&=]+)=([^&]+)/g, | |
162 function (_, name, value) { | |
163 value = decodeURIComponent(value); | |
164 name = decodeURIComponent(name); | |
165 if (name == 'autorun') { autorun = !/^[0fn]/i.test(value); } else | |
166 if (name == 'lang') { langs.push(value); } else | |
167 if (name == 'skin') { skins.push(value); } else | |
168 if (name == 'callback') { callbacks.push(value); } | |
169 }); | |
170 | |
171 // Use https to avoid mixed content warnings in client pages and to | |
172 // prevent a MITM from rewrite prettify mid-flight. | |
173 // This only works if this script is loaded via https : something | |
174 // over which we exercise no control. | |
175 var LOADER_BASE_URL = | |
176 'https://google-code-prettify.googlecode.com/svn/loader'; | |
177 | |
178 for (var i = 0, n = langs.length; i < n; ++i) (function (lang) { | |
179 var script = doc.createElement("script"); | |
180 | |
181 // Excerpted from jQuery.ajaxTransport("script") to fire events when | |
182 // a script is finished loading. | |
183 // Attach handlers for each script | |
184 script.onload = script.onerror = script.onreadystatechange = function () { | |
185 if (script && ( | |
186 !script.readyState || /loaded|complete/.test(script.readyState))) { | |
187 // Handle memory leak in IE | |
188 script.onerror = script.onload = script.onreadystatechange = null; | |
189 | |
190 --pendingLanguages; | |
191 checkPendingLanguages(); | |
192 | |
193 // Remove the script | |
194 if (script.parentNode) { | |
195 script.parentNode.removeChild(script); | |
196 } | |
197 | |
198 script = null; | |
199 } | |
200 }; | |
201 | |
202 script.type = 'text/javascript'; | |
203 script.src = LOADER_BASE_URL | |
204 + '/lang-' + encodeURIComponent(langs[i]) + '.js'; | |
205 | |
206 // Circumvent IE6 bugs with base elements (#2709 and #4378) by prepending | |
207 head.insertBefore(script, head.firstChild); | |
208 })(langs[i]); | |
209 | |
210 var pendingLanguages = langs.length; | |
211 function checkPendingLanguages() { | |
212 if (!pendingLanguages) { | |
213 setTimeout(onLangsLoaded, 0); | |
214 } | |
215 } | |
216 | |
217 var skinUrls = []; | |
218 for (var i = 0, n = skins.length; i < n; ++i) { | |
219 skinUrls.push(LOADER_BASE_URL | |
220 + '/skins/' + encodeURIComponent(skins[i]) + '.css'); | |
221 } | |
222 skinUrls.push(LOADER_BASE_URL + '/prettify.css'); | |
223 loadStylesheetsFallingBack(skinUrls); | |
224 | |
225 var prettyPrint = (function () { | |
226 // Copyright (C) 2006 Google Inc. | |
227 // | |
228 // Licensed under the Apache License, Version 2.0 (the "License"); | |
229 // you may not use this file except in compliance with the License. | |
230 // You may obtain a copy of the License at | |
231 // | |
232 // http://www.apache.org/licenses/LICENSE-2.0 | |
233 // | |
234 // Unless required by applicable law or agreed to in writing, software | |
235 // distributed under the License is distributed on an "AS IS" BASIS, | |
236 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
237 // See the License for the specific language governing permissions and | |
238 // limitations under the License. | |
239 | |
240 | |
241 /** | |
242 * @fileoverview | |
243 * some functions for browser-side pretty printing of code contained in html
. | |
244 * | |
245 * <p> | |
246 * For a fairly comprehensive set of languages see the | |
247 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html
#langs">README</a> | |
248 * file that came with this source. At a minimum, the lexer should work on
a | |
249 * number of languages including C and friends, Java, Python, Bash, SQL, HTM
L, | |
250 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and
Awk | |
251 * and a subset of Perl, but, because of commenting conventions, doesn't wor
k on | |
252 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang cla
ss. | |
253 * <p> | |
254 * Usage: <ol> | |
255 * <li> include this source file in an html page via | |
256 * {@code <script type="text/javascript" src="/path/to/prettify.js"></scri
pt>} | |
257 * <li> define style rules. See the example page for examples. | |
258 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
259 * {@code class=prettyprint.} | |
260 * You can also use the (html deprecated) {@code <xmp>} tag, but the pret
ty | |
261 * printer needs to do more substantial DOM manipulations to support that
, so | |
262 * some css styles may not be preserved. | |
263 * </ol> | |
264 * That's it. I wanted to keep the API as simple as possible, so there's no | |
265 * need to specify which language the code is in, but if you wish, you can a
dd | |
266 * another class to the {@code <pre>} or {@code <code>} element to specify t
he | |
267 * language, as in {@code <pre class="prettyprint lang-java">}. Any class t
hat | |
268 * starts with "lang-" followed by a file extension, specifies the file type
. | |
269 * See the "lang-*.js" files in this directory for code that implements | |
270 * per-language file handlers. | |
271 * <p> | |
272 * Change log:<br> | |
273 * cbeust, 2006/08/22 | |
274 * <blockquote> | |
275 * Java annotations (start with "@") are now captured as literals ("lit") | |
276 * </blockquote> | |
277 * @requires console | |
278 */ | |
279 | |
280 // JSLint declarations | |
281 /*global console, document, navigator, setTimeout, window, define */ | |
282 | |
283 /** | |
284 * Split {@code prettyPrint} into multiple timeouts so as not to interfere w
ith | |
285 * UI events. | |
286 * If set to {@code false}, {@code prettyPrint()} is synchronous. | |
287 */ | |
288 window['PR_SHOULD_USE_CONTINUATION'] = true; | |
289 | |
290 /** | |
291 * Pretty print a chunk of code. | |
292 * @param {string} sourceCodeHtml The HTML to pretty print. | |
293 * @param {string} opt_langExtension The language name to use. | |
294 * Typically, a filename extension like 'cpp' or 'java'. | |
295 * @param {number|boolean} opt_numberLines True to number lines, | |
296 * or the 1-indexed number of the first line in sourceCodeHtml. | |
297 * @return {string} code as html, but prettier | |
298 */ | |
299 var prettyPrintOne; | |
300 /** | |
301 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
302 * {@code class=prettyprint} and prettify them. | |
303 * | |
304 * @param {Function} opt_whenDone called when prettifying is done. | |
305 * @param {HTMLElement|HTMLDocument} opt_root an element or document | |
306 * containing all the elements to pretty print. | |
307 * Defaults to {@code document.body}. | |
308 */ | |
309 var prettyPrint; | |
310 | |
311 | |
312 (function () { | |
313 var win = window; | |
314 // Keyword lists for various languages. | |
315 // We use things that coerce to strings to make them compact when minified | |
316 // and to defeat aggressive optimizers that fold large string constants. | |
317 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]
; | |
318 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + | |
319 "double,enum,extern,float,goto,inline,int,long,register,short,signed,"
+ | |
320 "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"]; | |
321 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + | |
322 "new,operator,private,protected,public,this,throw,true,try,typeof"]; | |
323 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool,"
+ | |
324 "concept,concept_map,const_cast,constexpr,decltype,delegate," + | |
325 "dynamic_cast,explicit,export,friend,generic,late_check," + | |
326 "mutable,namespace,nullptr,property,reinterpret_cast,static_assert," + | |
327 "static_cast,template,typeid,typename,using,virtual,where"]; | |
328 var JAVA_KEYWORDS = [COMMON_KEYWORDS, | |
329 "abstract,assert,boolean,byte,extends,final,finally,implements,import,
" + | |
330 "instanceof,interface,null,native,package,strictfp,super,synchronized,
" + | |
331 "throws,transient"]; | |
332 var CSHARP_KEYWORDS = [JAVA_KEYWORDS, | |
333 "as,base,by,checked,decimal,delegate,descending,dynamic,event," + | |
334 "fixed,foreach,from,group,implicit,in,internal,into,is,let," + | |
335 "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte,"
+ | |
336 "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,"
+ | |
337 "var,virtual,where"]; | |
338 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally,"
+ | |
339 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then
," + | |
340 "throw,true,try,unless,until,when,while,yes"; | |
341 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, | |
342 "debugger,eval,export,function,get,null,set,undefined,var,with," + | |
343 "Infinity,NaN"]; | |
344 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for
," + | |
345 "goto,if,import,last,local,my,next,no,our,print,package,redo,require,"
+ | |
346 "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; | |
347 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del
," + | |
348 "elif,except,exec,finally,from,global,import,in,is,lambda," + | |
349 "nonlocal,not,or,pass,print,raise,try,with,yield," + | |
350 "False,True,None"]; | |
351 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class,"
+ | |
352 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + | |
353 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + | |
354 "BEGIN,END"]; | |
355 var RUST_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "as,assert,const,copy,drop,"
+ | |
356 "enum,extern,fail,false,fn,impl,let,log,loop,match,mod,move,mut,priv,"
+ | |
357 "pub,pure,ref,self,static,struct,true,trait,type,unsafe,use"]; | |
358 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + | |
359 "function,in,local,set,then,until"]; | |
360 var ALL_KEYWORDS = [ | |
361 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, | |
362 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; | |
363 var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?
iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)\b/; | |
364 | |
365 // token style names. correspond to css classes | |
366 /** | |
367 * token style for a string literal | |
368 * @const | |
369 */ | |
370 var PR_STRING = 'str'; | |
371 /** | |
372 * token style for a keyword | |
373 * @const | |
374 */ | |
375 var PR_KEYWORD = 'kwd'; | |
376 /** | |
377 * token style for a comment | |
378 * @const | |
379 */ | |
380 var PR_COMMENT = 'com'; | |
381 /** | |
382 * token style for a type | |
383 * @const | |
384 */ | |
385 var PR_TYPE = 'typ'; | |
386 /** | |
387 * token style for a literal value. e.g. 1, null, true. | |
388 * @const | |
389 */ | |
390 var PR_LITERAL = 'lit'; | |
391 /** | |
392 * token style for a punctuation string. | |
393 * @const | |
394 */ | |
395 var PR_PUNCTUATION = 'pun'; | |
396 /** | |
397 * token style for plain text. | |
398 * @const | |
399 */ | |
400 var PR_PLAIN = 'pln'; | |
401 | |
402 /** | |
403 * token style for an sgml tag. | |
404 * @const | |
405 */ | |
406 var PR_TAG = 'tag'; | |
407 /** | |
408 * token style for a markup declaration such as a DOCTYPE. | |
409 * @const | |
410 */ | |
411 var PR_DECLARATION = 'dec'; | |
412 /** | |
413 * token style for embedded source. | |
414 * @const | |
415 */ | |
416 var PR_SOURCE = 'src'; | |
417 /** | |
418 * token style for an sgml attribute name. | |
419 * @const | |
420 */ | |
421 var PR_ATTRIB_NAME = 'atn'; | |
422 /** | |
423 * token style for an sgml attribute value. | |
424 * @const | |
425 */ | |
426 var PR_ATTRIB_VALUE = 'atv'; | |
427 | |
428 /** | |
429 * A class that indicates a section of markup that is not code, e.g. to al
low | |
430 * embedding of line numbers within code listings. | |
431 * @const | |
432 */ | |
433 var PR_NOCODE = 'nocode'; | |
434 | |
435 | |
436 | |
437 /** | |
438 * A set of tokens that can precede a regular expression literal in | |
439 * javascript | |
440 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/lan
guage/js20/rationale/syntax.html | |
441 * has the full list, but I've removed ones that might be problematic when | |
442 * seen in languages that don't support regular expression literals. | |
443 * | |
444 * <p>Specifically, I've removed any keywords that can't precede a regexp | |
445 * literal in a syntactically legal javascript program, and I've removed t
he | |
446 * "in" keyword since it's not a keyword in many languages, and might be u
sed | |
447 * as a count of inches. | |
448 * | |
449 * <p>The link above does not accurately describe EcmaScript rules since | |
450 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works | |
451 * very well in practice. | |
452 * | |
453 * @private | |
454 * @const | |
455 */ | |
456 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\
\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|br
eak|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*
'; | |
457 | |
458 // CAVEAT: this does not properly handle the case where a regular | |
459 // expression immediately follows another since a regular expression may | |
460 // have flags for case-sensitivity and the like. Having regexp tokens | |
461 // adjacent is not valid in any language I'm aware of, so I'm punting. | |
462 // TODO: maybe style special characters inside a regexp as punctuation. | |
463 | |
464 /** | |
465 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globall
y | |
466 * matches the union of the sets of strings matched by the input RegExp. | |
467 * Since it matches globally, if the input strings have a start-of-input | |
468 * anchor (/^.../), it is ignored for the purposes of unioning. | |
469 * @param {Array.<RegExp>} regexs non multiline, non-global regexs. | |
470 * @return {RegExp} a global regex. | |
471 */ | |
472 function combinePrefixPatterns(regexs) { | |
473 var capturedGroupIndex = 0; | |
474 | |
475 var needToFoldCase = false; | |
476 var ignoreCase = false; | |
477 for (var i = 0, n = regexs.length; i < n; ++i) { | |
478 var regex = regexs[i]; | |
479 if (regex.ignoreCase) { | |
480 ignoreCase = true; | |
481 } else if (/[a-z]/i.test(regex.source.replace( | |
482 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
483 needToFoldCase = true; | |
484 ignoreCase = false; | |
485 break; | |
486 } | |
487 } | |
488 | |
489 var escapeCharToCodeUnit = { | |
490 'b': 8, | |
491 't': 9, | |
492 'n': 0xa, | |
493 'v': 0xb, | |
494 'f': 0xc, | |
495 'r': 0xd | |
496 }; | |
497 | |
498 function decodeEscape(charsetPart) { | |
499 var cc0 = charsetPart.charCodeAt(0); | |
500 if (cc0 !== 92 /* \\ */) { | |
501 return cc0; | |
502 } | |
503 var c1 = charsetPart.charAt(1); | |
504 cc0 = escapeCharToCodeUnit[c1]; | |
505 if (cc0) { | |
506 return cc0; | |
507 } else if ('0' <= c1 && c1 <= '7') { | |
508 return parseInt(charsetPart.substring(1), 8); | |
509 } else if (c1 === 'u' || c1 === 'x') { | |
510 return parseInt(charsetPart.substring(2), 16); | |
511 } else { | |
512 return charsetPart.charCodeAt(1); | |
513 } | |
514 } | |
515 | |
516 function encodeEscape(charCode) { | |
517 if (charCode < 0x20) { | |
518 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
519 } | |
520 var ch = String.fromCharCode(charCode); | |
521 return (ch === '\\' || ch === '-' || ch === ']' || ch === '^') | |
522 ? "\\" + ch : ch; | |
523 } | |
524 | |
525 function caseFoldCharset(charSet) { | |
526 var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
527 new RegExp( | |
528 '\\\\u[0-9A-Fa-f]{4}' | |
529 + '|\\\\x[0-9A-Fa-f]{2}' | |
530 + '|\\\\[0-3][0-7]{0,2}' | |
531 + '|\\\\[0-7]{1,2}' | |
532 + '|\\\\[\\s\\S]' | |
533 + '|-' | |
534 + '|[^-\\\\]', | |
535 'g')); | |
536 var ranges = []; | |
537 var inverse = charsetParts[0] === '^'; | |
538 | |
539 var out = ['[']; | |
540 if (inverse) { out.push('^'); } | |
541 | |
542 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
543 var p = charsetParts[i]; | |
544 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. | |
545 out.push(p); | |
546 } else { | |
547 var start = decodeEscape(p); | |
548 var end; | |
549 if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
550 end = decodeEscape(charsetParts[i + 2]); | |
551 i += 2; | |
552 } else { | |
553 end = start; | |
554 } | |
555 ranges.push([start, end]); | |
556 // If the range might intersect letters, then expand it. | |
557 // This case handling is too simplistic. | |
558 // It does not deal with non-latin case folding. | |
559 // It works for latin source code identifiers though. | |
560 if (!(end < 65 || start > 122)) { | |
561 if (!(end < 65 || start > 90)) { | |
562 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]
); | |
563 } | |
564 if (!(end < 97 || start > 122)) { | |
565 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~
32]); | |
566 } | |
567 } | |
568 } | |
569 } | |
570 | |
571 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
572 // -> [[1, 12], [14, 14], [16, 17]] | |
573 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]);
}); | |
574 var consolidatedRanges = []; | |
575 var lastRange = []; | |
576 for (var i = 0; i < ranges.length; ++i) { | |
577 var range = ranges[i]; | |
578 if (range[0] <= lastRange[1] + 1) { | |
579 lastRange[1] = Math.max(lastRange[1], range[1]); | |
580 } else { | |
581 consolidatedRanges.push(lastRange = range); | |
582 } | |
583 } | |
584 | |
585 for (var i = 0; i < consolidatedRanges.length; ++i) { | |
586 var range = consolidatedRanges[i]; | |
587 out.push(encodeEscape(range[0])); | |
588 if (range[1] > range[0]) { | |
589 if (range[1] + 1 > range[0]) { out.push('-'); } | |
590 out.push(encodeEscape(range[1])); | |
591 } | |
592 } | |
593 out.push(']'); | |
594 return out.join(''); | |
595 } | |
596 | |
597 function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
598 // Split into character sets, escape sequences, punctuation strings | |
599 // like ('(', '(?:', ')', '^'), and runs of characters that do not | |
600 // include any of the above. | |
601 var parts = regex.source.match( | |
602 new RegExp( | |
603 '(?:' | |
604 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
605 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
606 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
607 + '|\\\\[0-9]+' // a back-reference or octal escape | |
608 + '|\\\\[^ux0-9]' // other escape sequence | |
609 + '|\\(\\?[:!=]' // start of a non-capturing group | |
610 + '|[\\(\\)\\^]' // start/end of a group, or line start | |
611 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
612 + ')', | |
613 'g')); | |
614 var n = parts.length; | |
615 | |
616 // Maps captured group numbers to the number they will occupy in | |
617 // the output or to -1 if that has not been determined, or to | |
618 // undefined if they need not be capturing in the output. | |
619 var capturedGroups = []; | |
620 | |
621 // Walk over and identify back references to build the capturedGroups | |
622 // mapping. | |
623 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
624 var p = parts[i]; | |
625 if (p === '(') { | |
626 // groups are 1-indexed, so max group index is count of '(' | |
627 ++groupIndex; | |
628 } else if ('\\' === p.charAt(0)) { | |
629 var decimalValue = +p.substring(1); | |
630 if (decimalValue) { | |
631 if (decimalValue <= groupIndex) { | |
632 capturedGroups[decimalValue] = -1; | |
633 } else { | |
634 // Replace with an unambiguous escape sequence so that | |
635 // an octal escape sequence does not turn into a backreference | |
636 // to a capturing group from an earlier regex. | |
637 parts[i] = encodeEscape(decimalValue); | |
638 } | |
639 } | |
640 } | |
641 } | |
642 | |
643 // Renumber groups and reduce capturing groups to non-capturing groups | |
644 // where possible. | |
645 for (var i = 1; i < capturedGroups.length; ++i) { | |
646 if (-1 === capturedGroups[i]) { | |
647 capturedGroups[i] = ++capturedGroupIndex; | |
648 } | |
649 } | |
650 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
651 var p = parts[i]; | |
652 if (p === '(') { | |
653 ++groupIndex; | |
654 if (!capturedGroups[groupIndex]) { | |
655 parts[i] = '(?:'; | |
656 } | |
657 } else if ('\\' === p.charAt(0)) { | |
658 var decimalValue = +p.substring(1); | |
659 if (decimalValue && decimalValue <= groupIndex) { | |
660 parts[i] = '\\' + capturedGroups[decimalValue]; | |
661 } | |
662 } | |
663 } | |
664 | |
665 // Remove any prefix anchors so that the output will match anywhere. | |
666 // ^^ really does mean an anchored match though. | |
667 for (var i = 0; i < n; ++i) { | |
668 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } | |
669 } | |
670 | |
671 // Expand letters to groups to handle mixing of case-sensitive and | |
672 // case-insensitive patterns if necessary. | |
673 if (regex.ignoreCase && needToFoldCase) { | |
674 for (var i = 0; i < n; ++i) { | |
675 var p = parts[i]; | |
676 var ch0 = p.charAt(0); | |
677 if (p.length >= 2 && ch0 === '[') { | |
678 parts[i] = caseFoldCharset(p); | |
679 } else if (ch0 !== '\\') { | |
680 // TODO: handle letters in numeric escapes. | |
681 parts[i] = p.replace( | |
682 /[a-zA-Z]/g, | |
683 function (ch) { | |
684 var cc = ch.charCodeAt(0); | |
685 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
686 }); | |
687 } | |
688 } | |
689 } | |
690 | |
691 return parts.join(''); | |
692 } | |
693 | |
694 var rewritten = []; | |
695 for (var i = 0, n = regexs.length; i < n; ++i) { | |
696 var regex = regexs[i]; | |
697 if (regex.global || regex.multiline) { throw new Error('' + regex); } | |
698 rewritten.push( | |
699 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); | |
700 } | |
701 | |
702 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
703 } | |
704 | |
705 /** | |
706 * Split markup into a string of source code and an array mapping ranges i
n | |
707 * that string to the text nodes in which they appear. | |
708 * | |
709 * <p> | |
710 * The HTML DOM structure:</p> | |
711 * <pre> | |
712 * (Element "p" | |
713 * (Element "b" | |
714 * (Text "print ")) ; #1 | |
715 * (Text "'Hello '") ; #2 | |
716 * (Element "br") ; #3 | |
717 * (Text " + 'World';")) ; #4 | |
718 * </pre> | |
719 * <p> | |
720 * corresponds to the HTML | |
721 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> | |
722 * | |
723 * <p> | |
724 * It will produce the output:</p> | |
725 * <pre> | |
726 * { | |
727 * sourceCode: "print 'Hello '\n + 'World';", | |
728 * // 1 2 | |
729 * // 012345678901234 5678901234567 | |
730 * spans: [0, #1, 6, #2, 14, #3, 15, #4] | |
731 * } | |
732 * </pre> | |
733 * <p> | |
734 * where #1 is a reference to the {@code "print "} text node above, and so | |
735 * on for the other text nodes. | |
736 * </p> | |
737 * | |
738 * <p> | |
739 * The {@code} spans array is an array of pairs. Even elements are the st
art | |
740 * indices of substrings, and odd elements are the text nodes (or BR eleme
nts) | |
741 * that contain the text for those substrings. | |
742 * Substrings continue until the next index or the end of the source. | |
743 * </p> | |
744 * | |
745 * @param {Node} node an HTML DOM subtree containing source-code. | |
746 * @param {boolean} isPreformatted true if white-space in text nodes shoul
d | |
747 * be considered significant. | |
748 * @return {Object} source code and the text nodes in which they occur. | |
749 */ | |
750 function extractSourceSpans(node, isPreformatted) { | |
751 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
752 | |
753 var chunks = []; | |
754 var length = 0; | |
755 var spans = []; | |
756 var k = 0; | |
757 | |
758 function walk(node) { | |
759 var type = node.nodeType; | |
760 if (type == 1) { // Element | |
761 if (nocode.test(node.className)) { return; } | |
762 for (var child = node.firstChild; child; child = child.nextSibling)
{ | |
763 walk(child); | |
764 } | |
765 var nodeName = node.nodeName.toLowerCase(); | |
766 if ('br' === nodeName || 'li' === nodeName) { | |
767 chunks[k] = '\n'; | |
768 spans[k << 1] = length++; | |
769 spans[(k++ << 1) | 1] = node; | |
770 } | |
771 } else if (type == 3 || type == 4) { // Text | |
772 var text = node.nodeValue; | |
773 if (text.length) { | |
774 if (!isPreformatted) { | |
775 text = text.replace(/[ \t\r\n]+/g, ' '); | |
776 } else { | |
777 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. | |
778 } | |
779 // TODO: handle tabs here? | |
780 chunks[k] = text; | |
781 spans[k << 1] = length; | |
782 length += text.length; | |
783 spans[(k++ << 1) | 1] = node; | |
784 } | |
785 } | |
786 } | |
787 | |
788 walk(node); | |
789 | |
790 return { | |
791 sourceCode: chunks.join('').replace(/\n$/, ''), | |
792 spans: spans | |
793 }; | |
794 } | |
795 | |
796 /** | |
797 * Apply the given language handler to sourceCode and add the resulting | |
798 * decorations to out. | |
799 * @param {number} basePos the index of sourceCode within the chunk of sou
rce | |
800 * whose decorations are already present on out. | |
801 */ | |
802 function appendDecorations(basePos, sourceCode, langHandler, out) { | |
803 if (!sourceCode) { return; } | |
804 var job = { | |
805 sourceCode: sourceCode, | |
806 basePos: basePos | |
807 }; | |
808 langHandler(job); | |
809 out.push.apply(out, job.decorations); | |
810 } | |
811 | |
812 var notWs = /\S/; | |
813 | |
814 /** | |
815 * Given an element, if it contains only one child element and any text no
des | |
816 * it contains contain only space characters, return the sole child elemen
t. | |
817 * Otherwise returns undefined. | |
818 * <p> | |
819 * This is meant to return the CODE element in {@code <pre><code ...>} whe
n | |
820 * there is a single child element that contains all the non-space textual | |
821 * content, but not to return anything where there are multiple child elem
ents | |
822 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there | |
823 * is textual content. | |
824 */ | |
825 function childContentWrapper(element) { | |
826 var wrapper = undefined; | |
827 for (var c = element.firstChild; c; c = c.nextSibling) { | |
828 var type = c.nodeType; | |
829 wrapper = (type === 1) // Element Node | |
830 ? (wrapper ? element : c) | |
831 : (type === 3) // Text Node | |
832 ? (notWs.test(c.nodeValue) ? element : wrapper) | |
833 : wrapper; | |
834 } | |
835 return wrapper === element ? undefined : wrapper; | |
836 } | |
837 | |
838 /** Given triples of [style, pattern, context] returns a lexing function, | |
839 * The lexing function interprets the patterns to find token boundaries a
nd | |
840 * returns a decoration list of the form | |
841 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] | |
842 * where index_n is an index into the sourceCode, and style_n is a style | |
843 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies t
o | |
844 * all characters in sourceCode[index_n-1:index_n]. | |
845 * | |
846 * The stylePatterns is a list whose elements have the form | |
847 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. | |
848 * | |
849 * Style is a style constant like PR_PLAIN, or can be a string of the | |
850 * form 'lang-FOO', where FOO is a language extension describing the | |
851 * language of the portion of the token in $1 after pattern executes. | |
852 * E.g., if style is 'lang-lisp', and group 1 contains the text | |
853 * '(hello (world))', then that portion of the token will be passed to th
e | |
854 * registered lisp handler for formatting. | |
855 * The text before and after group 1 will be restyled using this decorato
r | |
856 * so decorators should take care that this doesn't result in infinite | |
857 * recursion. For example, the HTML lexer rule for SCRIPT elements looks | |
858 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may mat
ch | |
859 * '<script>foo()<\/script>', which would cause the current decorator to | |
860 * be called with '<script>' which would not match the same rule since | |
861 * group 1 must not be empty, so it would be instead styled as PR_TAG by | |
862 * the generic tag rule. The handler registered for the 'js' extension w
ould | |
863 * then be called with 'foo()', and finally, the current decorator would | |
864 * be called with '<\/script>' which would not match the original rule an
d | |
865 * so the generic tag rule would identify it as a tag. | |
866 * | |
867 * Pattern must only match prefixes, and if it matches a prefix, then tha
t | |
868 * match is considered a token with the same style. | |
869 * | |
870 * Context is applied to the last non-whitespace, non-comment token | |
871 * recognized. | |
872 * | |
873 * Shortcut is an optional string of characters, any of which, if the fir
st | |
874 * character, gurantee that this pattern and only this pattern matches. | |
875 * | |
876 * @param {Array} shortcutStylePatterns patterns that always start with | |
877 * a known character. Must have a shortcut string. | |
878 * @param {Array} fallthroughStylePatterns patterns that will be tried in | |
879 * order if the shortcut ones fail. May have shortcuts. | |
880 * | |
881 * @return {function (Object)} a | |
882 * function that takes source code and returns a list of decorations. | |
883 */ | |
884 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns
) { | |
885 var shortcuts = {}; | |
886 var tokenizer; | |
887 (function () { | |
888 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePattern
s); | |
889 var allRegexs = []; | |
890 var regexKeys = {}; | |
891 for (var i = 0, n = allPatterns.length; i < n; ++i) { | |
892 var patternParts = allPatterns[i]; | |
893 var shortcutChars = patternParts[3]; | |
894 if (shortcutChars) { | |
895 for (var c = shortcutChars.length; --c >= 0;) { | |
896 shortcuts[shortcutChars.charAt(c)] = patternParts; | |
897 } | |
898 } | |
899 var regex = patternParts[1]; | |
900 var k = '' + regex; | |
901 if (!regexKeys.hasOwnProperty(k)) { | |
902 allRegexs.push(regex); | |
903 regexKeys[k] = null; | |
904 } | |
905 } | |
906 allRegexs.push(/[\0-\uffff]/); | |
907 tokenizer = combinePrefixPatterns(allRegexs); | |
908 })(); | |
909 | |
910 var nPatterns = fallthroughStylePatterns.length; | |
911 | |
912 /** | |
913 * Lexes job.sourceCode and produces an output array job.decorations of | |
914 * style classes preceded by the position at which they start in | |
915 * job.sourceCode in order. | |
916 * | |
917 * @param {Object} job an object like <pre>{ | |
918 * sourceCode: {string} sourceText plain text, | |
919 * basePos: {int} position of job.sourceCode in the larger chunk of | |
920 * sourceCode. | |
921 * }</pre> | |
922 */ | |
923 var decorate = function (job) { | |
924 var sourceCode = job.sourceCode, basePos = job.basePos; | |
925 /** Even entries are positions in source in ascending order. Odd enti
es | |
926 * are style markers (e.g., PR_COMMENT) that run from that position u
ntil | |
927 * the end. | |
928 * @type {Array.<number|string>} | |
929 */ | |
930 var decorations = [basePos, PR_PLAIN]; | |
931 var pos = 0; // index into sourceCode | |
932 var tokens = sourceCode.match(tokenizer) || []; | |
933 var styleCache = {}; | |
934 | |
935 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
936 var token = tokens[ti]; | |
937 var style = styleCache[token]; | |
938 var match = void 0; | |
939 | |
940 var isEmbedded; | |
941 if (typeof style === 'string') { | |
942 isEmbedded = false; | |
943 } else { | |
944 var patternParts = shortcuts[token.charAt(0)]; | |
945 if (patternParts) { | |
946 match = token.match(patternParts[1]); | |
947 style = patternParts[0]; | |
948 } else { | |
949 for (var i = 0; i < nPatterns; ++i) { | |
950 patternParts = fallthroughStylePatterns[i]; | |
951 match = token.match(patternParts[1]); | |
952 if (match) { | |
953 style = patternParts[0]; | |
954 break; | |
955 } | |
956 } | |
957 | |
958 if (!match) { // make sure that we make progress | |
959 style = PR_PLAIN; | |
960 } | |
961 } | |
962 | |
963 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5
); | |
964 if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
965 isEmbedded = false; | |
966 style = PR_SOURCE; | |
967 } | |
968 | |
969 if (!isEmbedded) { styleCache[token] = style; } | |
970 } | |
971 | |
972 var tokenStart = pos; | |
973 pos += token.length; | |
974 | |
975 if (!isEmbedded) { | |
976 decorations.push(basePos + tokenStart, style); | |
977 } else { // Treat group 1 as an embedded block of source code. | |
978 var embeddedSource = match[1]; | |
979 var embeddedSourceStart = token.indexOf(embeddedSource); | |
980 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.lengt
h; | |
981 if (match[2]) { | |
982 // If embeddedSource can be blank, then it would match at the | |
983 // beginning which would cause us to infinitely recurse on the | |
984 // entire token, so we catch the right context in match[2]. | |
985 embeddedSourceEnd = token.length - match[2].length; | |
986 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
987 } | |
988 var lang = style.substring(5); | |
989 // Decorate the left of the embedded source | |
990 appendDecorations( | |
991 basePos + tokenStart, | |
992 token.substring(0, embeddedSourceStart), | |
993 decorate, decorations); | |
994 // Decorate the embedded source | |
995 appendDecorations( | |
996 basePos + tokenStart + embeddedSourceStart, | |
997 embeddedSource, | |
998 langHandlerForExtension(lang, embeddedSource), | |
999 decorations); | |
1000 // Decorate the right of the embedded section | |
1001 appendDecorations( | |
1002 basePos + tokenStart + embeddedSourceEnd, | |
1003 token.substring(embeddedSourceEnd), | |
1004 decorate, decorations); | |
1005 } | |
1006 } | |
1007 job.decorations = decorations; | |
1008 }; | |
1009 return decorate; | |
1010 } | |
1011 | |
1012 /** returns a function that produces a list of decorations from source tex
t. | |
1013 * | |
1014 * This code treats ", ', and ` as string delimiters, and \ as a string | |
1015 * escape. It does not recognize perl's qq() style strings. | |
1016 * It has no special handling for double delimiter escapes as in basic, o
r | |
1017 * the tripled delimiters used in python, but should work on those regard
less | |
1018 * although in those cases a single string literal may be broken up into | |
1019 * multiple adjacent string literals. | |
1020 * | |
1021 * It recognizes C, C++, and shell style comments. | |
1022 * | |
1023 * @param {Object} options a set of optional parameters. | |
1024 * @return {function (Object)} a function that examines the source code | |
1025 * in the input job and builds the decoration list. | |
1026 */ | |
1027 function sourceDecorator(options) { | |
1028 var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
1029 if (options['tripleQuotedStrings']) { | |
1030 // '''multi-line-string''', 'single-line-string', and double-quoted | |
1031 shortcutStylePatterns.push( | |
1032 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\
'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\
\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
1033 null, '\'"']); | |
1034 } else if (options['multiLineStrings']) { | |
1035 // 'multi-line-string', "multi-line-string" | |
1036 shortcutStylePatterns.push( | |
1037 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[
\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
1038 null, '\'"`']); | |
1039 } else { | |
1040 // 'single-line-string', "single-line-string" | |
1041 shortcutStylePatterns.push( | |
1042 [PR_STRING, | |
1043 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$
))/, | |
1044 null, '"\'']); | |
1045 } | |
1046 if (options['verbatimStrings']) { | |
1047 // verbatim-string-literal production from the C# grammar. See issue
93. | |
1048 fallthroughStylePatterns.push( | |
1049 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); | |
1050 } | |
1051 var hc = options['hashComments']; | |
1052 if (hc) { | |
1053 if (options['cStyleComments']) { | |
1054 if (hc > 1) { // multiline hash comments | |
1055 shortcutStylePatterns.push( | |
1056 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#
']); | |
1057 } else { | |
1058 // Stop C preprocessor declarations at an unclosed open comment | |
1059 shortcutStylePatterns.push( | |
1060 [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|in
clude|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
1061 null, '#']); | |
1062 } | |
1063 // #include <stdio.h> | |
1064 fallthroughStylePatterns.push( | |
1065 [PR_STRING, | |
1066 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|
pp|\+\+)?|[a-z]\w*)>/, | |
1067 null]); | |
1068 } else { | |
1069 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); | |
1070 } | |
1071 } | |
1072 if (options['cStyleComments']) { | |
1073 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); | |
1074 fallthroughStylePatterns.push( | |
1075 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); | |
1076 } | |
1077 var regexLiterals = options['regexLiterals']; | |
1078 if (regexLiterals) { | |
1079 /** | |
1080 * @const | |
1081 */ | |
1082 var regexExcls = regexLiterals > 1 | |
1083 ? '' // Multiline regex literals | |
1084 : '\n\r'; | |
1085 /** | |
1086 * @const | |
1087 */ | |
1088 var regexAny = regexExcls ? '.' : '[\\S\\s]'; | |
1089 /** | |
1090 * @const | |
1091 */ | |
1092 var REGEX_LITERAL = ( | |
1093 // A regular expression literal starts with a slash that is | |
1094 // not followed by * or / so that it is not confused with | |
1095 // comments. | |
1096 '/(?=[^/*' + regexExcls + '])' | |
1097 // and then contains any number of raw characters, | |
1098 + '(?:[^/\\x5B\\x5C' + regexExcls + ']' | |
1099 // escape sequences (\x5C), | |
1100 + '|\\x5C' + regexAny | |
1101 // or non-nesting character sets (\x5B\x5D); | |
1102 + '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']' | |
1103 + '|\\x5C' + regexAny + ')*(?:\\x5D|$))+' | |
1104 // finally closed by a /. | |
1105 + '/'); | |
1106 fallthroughStylePatterns.push( | |
1107 ['lang-regex', | |
1108 RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') | |
1109 ]); | |
1110 } | |
1111 | |
1112 var types = options['types']; | |
1113 if (types) { | |
1114 fallthroughStylePatterns.push([PR_TYPE, types]); | |
1115 } | |
1116 | |
1117 var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); | |
1118 if (keywords.length) { | |
1119 fallthroughStylePatterns.push( | |
1120 [PR_KEYWORD, | |
1121 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), | |
1122 null]); | |
1123 } | |
1124 | |
1125 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']
); | |
1126 | |
1127 var punctuation = | |
1128 // The Bash man page says | |
1129 | |
1130 // A word is a sequence of characters considered as a single | |
1131 // unit by GRUB. Words are separated by metacharacters, | |
1132 // which are the following plus space, tab, and newline: { } | |
1133 // | & $ ; < > | |
1134 // ... | |
1135 | |
1136 // A word beginning with # causes that word and all remaining | |
1137 // characters on that line to be ignored. | |
1138 | |
1139 // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a | |
1140 // comment but empirically | |
1141 // $ echo {#} | |
1142 // {#} | |
1143 // $ echo \$# | |
1144 // $# | |
1145 // $ echo }# | |
1146 // }# | |
1147 | |
1148 // so /(?:^|[|&;<>\s])/ is more appropriate. | |
1149 | |
1150 // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3 | |
1151 // suggests that this definition is compatible with a | |
1152 // default mode that tries to use a single token definition | |
1153 // to recognize both bash/python style comments and C | |
1154 // preprocessor directives. | |
1155 | |
1156 // This definition of punctuation does not include # in the list of | |
1157 // follow-on exclusions, so # will not be broken before if preceeded | |
1158 // by a punctuation character. We could try to exclude # after | |
1159 // [|&;<>] but that doesn't seem to cause many major problems. | |
1160 // If that does turn out to be a problem, we should change the below | |
1161 // when hc is truthy to include # in the run of punctuation characters | |
1162 // only when not followint [|&;<>]. | |
1163 '^.[^\\s\\w.$@\'"`/\\\\]*'; | |
1164 if (options['regexLiterals']) { | |
1165 punctuation += '(?!\s*\/)'; | |
1166 } | |
1167 | |
1168 fallthroughStylePatterns.push( | |
1169 // TODO(mikesamuel): recognize non-latin letters and numerals in ide
nts | |
1170 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], | |
1171 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, nul
l], | |
1172 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], | |
1173 [PR_LITERAL, | |
1174 new RegExp( | |
1175 '^(?:' | |
1176 // A hex number | |
1177 + '0x[a-f0-9]+' | |
1178 // or an octal or decimal number, | |
1179 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
1180 // possibly in scientific notation | |
1181 + '(?:e[+\\-]?\\d+)?' | |
1182 + ')' | |
1183 // with an optional modifier like UL for unsigned long | |
1184 + '[a-z]*', 'i'), | |
1185 null, '0123456789'], | |
1186 // Don't treat escaped quotes in bash as starting strings. | |
1187 // See issue 144. | |
1188 [PR_PLAIN, /^\\[\s\S]?/, null], | |
1189 [PR_PUNCTUATION, new RegExp(punctuation), null]); | |
1190 | |
1191 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns
); | |
1192 } | |
1193 | |
1194 var decorateSource = sourceDecorator({ | |
1195 'keywords': ALL_KEYWORDS, | |
1196 'hashComments': true, | |
1197 'cStyleComments': true, | |
1198 'multiLineStrings': true, | |
1199 'regexLiterals': true | |
1200 }); | |
1201 | |
1202 /** | |
1203 * Given a DOM subtree, wraps it in a list, and puts each line into its ow
n | |
1204 * list item. | |
1205 * | |
1206 * @param {Node} node modified in place. Its content is pulled into an | |
1207 * HTMLOListElement, and each line is moved into a separate list item. | |
1208 * This requires cloning elements, so the input might not have unique | |
1209 * IDs after numbering. | |
1210 * @param {boolean} isPreformatted true iff white-space in text nodes shou
ld | |
1211 * be treated as significant. | |
1212 */ | |
1213 function numberLines(node, opt_startLineNum, isPreformatted) { | |
1214 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
1215 var lineBreak = /\r\n?|\n/; | |
1216 | |
1217 var document = node.ownerDocument; | |
1218 | |
1219 var li = document.createElement('li'); | |
1220 while (node.firstChild) { | |
1221 li.appendChild(node.firstChild); | |
1222 } | |
1223 // An array of lines. We split below, so this is initialized to one | |
1224 // un-split line. | |
1225 var listItems = [li]; | |
1226 | |
1227 function walk(node) { | |
1228 var type = node.nodeType; | |
1229 if (type == 1 && !nocode.test(node.className)) { // Element | |
1230 if ('br' === node.nodeName) { | |
1231 breakAfter(node); | |
1232 // Discard the <BR> since it is now flush against a </LI>. | |
1233 if (node.parentNode) { | |
1234 node.parentNode.removeChild(node); | |
1235 } | |
1236 } else { | |
1237 for (var child = node.firstChild; child; child = child.nextSibling
) { | |
1238 walk(child); | |
1239 } | |
1240 } | |
1241 } else if ((type == 3 || type == 4) && isPreformatted) { // Text | |
1242 var text = node.nodeValue; | |
1243 var match = text.match(lineBreak); | |
1244 if (match) { | |
1245 var firstLine = text.substring(0, match.index); | |
1246 node.nodeValue = firstLine; | |
1247 var tail = text.substring(match.index + match[0].length); | |
1248 if (tail) { | |
1249 var parent = node.parentNode; | |
1250 parent.insertBefore( | |
1251 document.createTextNode(tail), node.nextSibling); | |
1252 } | |
1253 breakAfter(node); | |
1254 if (!firstLine) { | |
1255 // Don't leave blank text nodes in the DOM. | |
1256 node.parentNode.removeChild(node); | |
1257 } | |
1258 } | |
1259 } | |
1260 } | |
1261 | |
1262 // Split a line after the given node. | |
1263 function breakAfter(lineEndNode) { | |
1264 // If there's nothing to the right, then we can skip ending the line | |
1265 // here, and move root-wards since splitting just before an end-tag | |
1266 // would require us to create a bunch of empty copies. | |
1267 while (!lineEndNode.nextSibling) { | |
1268 lineEndNode = lineEndNode.parentNode; | |
1269 if (!lineEndNode) { return; } | |
1270 } | |
1271 | |
1272 function breakLeftOf(limit, copy) { | |
1273 // Clone shallowly if this node needs to be on both sides of the bre
ak. | |
1274 var rightSide = copy ? limit.cloneNode(false) : limit; | |
1275 var parent = limit.parentNode; | |
1276 if (parent) { | |
1277 // We clone the parent chain. | |
1278 // This helps us resurrect important styling elements that cross l
ines. | |
1279 // E.g. in <i>Foo<br>Bar</i> | |
1280 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. | |
1281 var parentClone = breakLeftOf(parent, 1); | |
1282 // Move the clone and everything to the right of the original | |
1283 // onto the cloned parent. | |
1284 var next = limit.nextSibling; | |
1285 parentClone.appendChild(rightSide); | |
1286 for (var sibling = next; sibling; sibling = next) { | |
1287 next = sibling.nextSibling; | |
1288 parentClone.appendChild(sibling); | |
1289 } | |
1290 } | |
1291 return rightSide; | |
1292 } | |
1293 | |
1294 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); | |
1295 | |
1296 // Walk the parent chain until we reach an unattached LI. | |
1297 for (var parent; | |
1298 // Check nodeType since IE invents document fragments. | |
1299 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { | |
1300 copiedListItem = parent; | |
1301 } | |
1302 // Put it on the list of lines for later processing. | |
1303 listItems.push(copiedListItem); | |
1304 } | |
1305 | |
1306 // Split lines while there are lines left to split. | |
1307 for (var i = 0; // Number of lines that have been split so far. | |
1308 i < listItems.length; // length updated by breakAfter calls. | |
1309 ++i) { | |
1310 walk(listItems[i]); | |
1311 } | |
1312 | |
1313 // Make sure numeric indices show correctly. | |
1314 if (opt_startLineNum === (opt_startLineNum|0)) { | |
1315 listItems[0].setAttribute('value', opt_startLineNum); | |
1316 } | |
1317 | |
1318 var ol = document.createElement('ol'); | |
1319 ol.className = 'linenums'; | |
1320 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0)
|| 0; | |
1321 for (var i = 0, n = listItems.length; i < n; ++i) { | |
1322 li = listItems[i]; | |
1323 // Stick a class on the LIs so that stylesheets can | |
1324 // color odd/even rows, or any other row pattern that | |
1325 // is co-prime with 10. | |
1326 li.className = 'L' + ((i + offset) % 10); | |
1327 if (!li.firstChild) { | |
1328 li.appendChild(document.createTextNode('\xA0')); | |
1329 } | |
1330 ol.appendChild(li); | |
1331 } | |
1332 | |
1333 node.appendChild(ol); | |
1334 } | |
1335 /** | |
1336 * Breaks {@code job.sourceCode} around style boundaries in | |
1337 * {@code job.decorations} and modifies {@code job.sourceNode} in place. | |
1338 * @param {Object} job like <pre>{ | |
1339 * sourceCode: {string} source as plain text, | |
1340 * sourceNode: {HTMLElement} the element containing the source, | |
1341 * spans: {Array.<number|Node>} alternating span start indices into sou
rce | |
1342 * and the text node or element (e.g. {@code <BR>}) corresponding to
that | |
1343 * span. | |
1344 * decorations: {Array.<number|string} an array of style classes preced
ed | |
1345 * by the position at which they start in job.sourceCode in order | |
1346 * }</pre> | |
1347 * @private | |
1348 */ | |
1349 function recombineTagsAndDecorations(job) { | |
1350 var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent); | |
1351 isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8; | |
1352 var newlineRe = /\n/g; | |
1353 | |
1354 var source = job.sourceCode; | |
1355 var sourceLength = source.length; | |
1356 // Index into source after the last code-unit recombined. | |
1357 var sourceIndex = 0; | |
1358 | |
1359 var spans = job.spans; | |
1360 var nSpans = spans.length; | |
1361 // Index into spans after the last span which ends at or before sourceIn
dex. | |
1362 var spanIndex = 0; | |
1363 | |
1364 var decorations = job.decorations; | |
1365 var nDecorations = decorations.length; | |
1366 // Index into decorations after the last decoration which ends at or bef
ore | |
1367 // sourceIndex. | |
1368 var decorationIndex = 0; | |
1369 | |
1370 // Remove all zero-length decorations. | |
1371 decorations[nDecorations] = sourceLength; | |
1372 var decPos, i; | |
1373 for (i = decPos = 0; i < nDecorations;) { | |
1374 if (decorations[i] !== decorations[i + 2]) { | |
1375 decorations[decPos++] = decorations[i++]; | |
1376 decorations[decPos++] = decorations[i++]; | |
1377 } else { | |
1378 i += 2; | |
1379 } | |
1380 } | |
1381 nDecorations = decPos; | |
1382 | |
1383 // Simplify decorations. | |
1384 for (i = decPos = 0; i < nDecorations;) { | |
1385 var startPos = decorations[i]; | |
1386 // Conflate all adjacent decorations that use the same style. | |
1387 var startDec = decorations[i + 1]; | |
1388 var end = i + 2; | |
1389 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { | |
1390 end += 2; | |
1391 } | |
1392 decorations[decPos++] = startPos; | |
1393 decorations[decPos++] = startDec; | |
1394 i = end; | |
1395 } | |
1396 | |
1397 nDecorations = decorations.length = decPos; | |
1398 | |
1399 var sourceNode = job.sourceNode; | |
1400 var oldDisplay; | |
1401 if (sourceNode) { | |
1402 oldDisplay = sourceNode.style.display; | |
1403 sourceNode.style.display = 'none'; | |
1404 } | |
1405 try { | |
1406 var decoration = null; | |
1407 while (spanIndex < nSpans) { | |
1408 var spanStart = spans[spanIndex]; | |
1409 var spanEnd = spans[spanIndex + 2] || sourceLength; | |
1410 | |
1411 var decEnd = decorations[decorationIndex + 2] || sourceLength; | |
1412 | |
1413 var end = Math.min(spanEnd, decEnd); | |
1414 | |
1415 var textNode = spans[spanIndex + 1]; | |
1416 var styledText; | |
1417 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s | |
1418 // Don't introduce spans around empty text nodes. | |
1419 && (styledText = source.substring(sourceIndex, end))) { | |
1420 // This may seem bizarre, and it is. Emitting LF on IE causes the | |
1421 // code to display with spaces instead of line breaks. | |
1422 // Emitting Windows standard issue linebreaks (CRLF) causes a blan
k | |
1423 // space to appear at the beginning of every line but the first. | |
1424 // Emitting an old Mac OS 9 line separator makes everything spiffy
. | |
1425 if (isIE8OrEarlier) { | |
1426 styledText = styledText.replace(newlineRe, '\r'); | |
1427 } | |
1428 textNode.nodeValue = styledText; | |
1429 var document = textNode.ownerDocument; | |
1430 var span = document.createElement('span'); | |
1431 span.className = decorations[decorationIndex + 1]; | |
1432 var parentNode = textNode.parentNode; | |
1433 parentNode.replaceChild(span, textNode); | |
1434 span.appendChild(textNode); | |
1435 if (sourceIndex < spanEnd) { // Split off a text node. | |
1436 spans[spanIndex + 1] = textNode | |
1437 // TODO: Possibly optimize by using '' if there's no flicker
. | |
1438 = document.createTextNode(source.substring(end, spanEnd)); | |
1439 parentNode.insertBefore(textNode, span.nextSibling); | |
1440 } | |
1441 } | |
1442 | |
1443 sourceIndex = end; | |
1444 | |
1445 if (sourceIndex >= spanEnd) { | |
1446 spanIndex += 2; | |
1447 } | |
1448 if (sourceIndex >= decEnd) { | |
1449 decorationIndex += 2; | |
1450 } | |
1451 } | |
1452 } finally { | |
1453 if (sourceNode) { | |
1454 sourceNode.style.display = oldDisplay; | |
1455 } | |
1456 } | |
1457 } | |
1458 | |
1459 /** Maps language-specific file extensions to handlers. */ | |
1460 var langHandlerRegistry = {}; | |
1461 /** Register a language handler for the given file extensions. | |
1462 * @param {function (Object)} handler a function from source code to a li
st | |
1463 * of decorations. Takes a single argument job which describes the | |
1464 * state of the computation. The single parameter has the form | |
1465 * {@code { | |
1466 * sourceCode: {string} as plain text. | |
1467 * decorations: {Array.<number|string>} an array of style classes | |
1468 * preceded by the position at which they start in | |
1469 * job.sourceCode in order. | |
1470 * The language handler should assigned this field. | |
1471 * basePos: {int} the position of source in the larger source chun
k. | |
1472 * All positions in the output decorations array are rela
tive | |
1473 * to the larger source chunk. | |
1474 * } } | |
1475 * @param {Array.<string>} fileExtensions | |
1476 */ | |
1477 function registerLangHandler(handler, fileExtensions) { | |
1478 for (var i = fileExtensions.length; --i >= 0;) { | |
1479 var ext = fileExtensions[i]; | |
1480 if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
1481 langHandlerRegistry[ext] = handler; | |
1482 } else if (win['console']) { | |
1483 console['warn']('cannot override language handler %s', ext); | |
1484 } | |
1485 } | |
1486 } | |
1487 function langHandlerForExtension(extension, source) { | |
1488 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
1489 // Treat it as markup if the first non whitespace character is a < and | |
1490 // the last non-whitespace character is a >. | |
1491 extension = /^\s*</.test(source) | |
1492 ? 'default-markup' | |
1493 : 'default-code'; | |
1494 } | |
1495 return langHandlerRegistry[extension]; | |
1496 } | |
1497 registerLangHandler(decorateSource, ['default-code']); | |
1498 registerLangHandler( | |
1499 createSimpleLexer( | |
1500 [], | |
1501 [ | |
1502 [PR_PLAIN, /^[^<?]+/], | |
1503 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], | |
1504 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], | |
1505 // Unescaped content in an unknown language | |
1506 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], | |
1507 ['lang-', /^<%([\s\S]+?)(?:%>|$)/], | |
1508 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], | |
1509 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], | |
1510 // Unescaped content in javascript. (Or possibly vbscript). | |
1511 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i
], | |
1512 // Contains unescaped stylesheet content | |
1513 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], | |
1514 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] | |
1515 ]), | |
1516 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); | |
1517 registerLangHandler( | |
1518 createSimpleLexer( | |
1519 [ | |
1520 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], | |
1521 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] | |
1522 ], | |
1523 [ | |
1524 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], | |
1525 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], | |
1526 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], | |
1527 [PR_PUNCTUATION, /^[=<>\/]+/], | |
1528 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], | |
1529 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], | |
1530 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], | |
1531 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], | |
1532 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], | |
1533 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] | |
1534 ]), | |
1535 ['in.tag']); | |
1536 registerLangHandler( | |
1537 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); | |
1538 registerLangHandler(sourceDecorator({ | |
1539 'keywords': CPP_KEYWORDS, | |
1540 'hashComments': true, | |
1541 'cStyleComments': true, | |
1542 'types': C_TYPES | |
1543 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); | |
1544 registerLangHandler(sourceDecorator({ | |
1545 'keywords': 'null,true,false' | |
1546 }), ['json']); | |
1547 registerLangHandler(sourceDecorator({ | |
1548 'keywords': CSHARP_KEYWORDS, | |
1549 'hashComments': true, | |
1550 'cStyleComments': true, | |
1551 'verbatimStrings': true, | |
1552 'types': C_TYPES | |
1553 }), ['cs']); | |
1554 registerLangHandler(sourceDecorator({ | |
1555 'keywords': JAVA_KEYWORDS, | |
1556 'cStyleComments': true | |
1557 }), ['java']); | |
1558 registerLangHandler(sourceDecorator({ | |
1559 'keywords': SH_KEYWORDS, | |
1560 'hashComments': true, | |
1561 'multiLineStrings': true | |
1562 }), ['bash', 'bsh', 'csh', 'sh']); | |
1563 registerLangHandler(sourceDecorator({ | |
1564 'keywords': PYTHON_KEYWORDS, | |
1565 'hashComments': true, | |
1566 'multiLineStrings': true, | |
1567 'tripleQuotedStrings': true | |
1568 }), ['cv', 'py', 'python']); | |
1569 registerLangHandler(sourceDecorator({ | |
1570 'keywords': PERL_KEYWORDS, | |
1571 'hashComments': true, | |
1572 'multiLineStrings': true, | |
1573 'regexLiterals': 2 // multiline regex literals | |
1574 }), ['perl', 'pl', 'pm']); | |
1575 registerLangHandler(sourceDecorator({ | |
1576 'keywords': RUBY_KEYWORDS, | |
1577 'hashComments': true, | |
1578 'multiLineStrings': true, | |
1579 'regexLiterals': true | |
1580 }), ['rb', 'ruby']); | |
1581 registerLangHandler(sourceDecorator({ | |
1582 'keywords': JSCRIPT_KEYWORDS, | |
1583 'cStyleComments': true, | |
1584 'regexLiterals': true | |
1585 }), ['javascript', 'js']); | |
1586 registerLangHandler(sourceDecorator({ | |
1587 'keywords': COFFEE_KEYWORDS, | |
1588 'hashComments': 3, // ### style block comments | |
1589 'cStyleComments': true, | |
1590 'multilineStrings': true, | |
1591 'tripleQuotedStrings': true, | |
1592 'regexLiterals': true | |
1593 }), ['coffee']); | |
1594 registerLangHandler(sourceDecorator({ | |
1595 'keywords': RUST_KEYWORDS, | |
1596 'cStyleComments': true, | |
1597 'multilineStrings': true | |
1598 }), ['rc', 'rs', 'rust']); | |
1599 registerLangHandler( | |
1600 createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); | |
1601 | |
1602 function applyDecorator(job) { | |
1603 var opt_langExtension = job.langExtension; | |
1604 | |
1605 try { | |
1606 // Extract tags, and convert the source code to plain text. | |
1607 var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre); | |
1608 /** Plain text. @type {string} */ | |
1609 var source = sourceAndSpans.sourceCode; | |
1610 job.sourceCode = source; | |
1611 job.spans = sourceAndSpans.spans; | |
1612 job.basePos = 0; | |
1613 | |
1614 // Apply the appropriate language handler | |
1615 langHandlerForExtension(opt_langExtension, source)(job); | |
1616 | |
1617 // Integrate the decorations and tags back into the source code, | |
1618 // modifying the sourceNode in place. | |
1619 recombineTagsAndDecorations(job); | |
1620 } catch (e) { | |
1621 if (win['console']) { | |
1622 console['log'](e && e['stack'] || e); | |
1623 } | |
1624 } | |
1625 } | |
1626 | |
1627 /** | |
1628 * Pretty print a chunk of code. | |
1629 * @param sourceCodeHtml {string} The HTML to pretty print. | |
1630 * @param opt_langExtension {string} The language name to use. | |
1631 * Typically, a filename extension like 'cpp' or 'java'. | |
1632 * @param opt_numberLines {number|boolean} True to number lines, | |
1633 * or the 1-indexed number of the first line in sourceCodeHtml. | |
1634 */ | |
1635 function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLine
s) { | |
1636 var container = document.createElement('div'); | |
1637 // This could cause images to load and onload listeners to fire. | |
1638 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. | |
1639 // We assume that the inner HTML is from a trusted source. | |
1640 // The pre-tag is required for IE8 which strips newlines from innerHTML | |
1641 // when it is injected into a <pre> tag. | |
1642 // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-w
hen-setting-innerhtml-in-ie | |
1643 // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-
pre-tag-ie-javascript | |
1644 container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>'; | |
1645 container = container.firstChild; | |
1646 if (opt_numberLines) { | |
1647 numberLines(container, opt_numberLines, true); | |
1648 } | |
1649 | |
1650 var job = { | |
1651 langExtension: opt_langExtension, | |
1652 numberLines: opt_numberLines, | |
1653 sourceNode: container, | |
1654 pre: 1 | |
1655 }; | |
1656 applyDecorator(job); | |
1657 return container.innerHTML; | |
1658 } | |
1659 | |
1660 /** | |
1661 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
1662 * {@code class=prettyprint} and prettify them. | |
1663 * | |
1664 * @param {Function} opt_whenDone called when prettifying is done. | |
1665 * @param {HTMLElement|HTMLDocument} opt_root an element or document | |
1666 * containing all the elements to pretty print. | |
1667 * Defaults to {@code document.body}. | |
1668 */ | |
1669 function $prettyPrint(opt_whenDone, opt_root) { | |
1670 var root = opt_root || document.body; | |
1671 var doc = root.ownerDocument || document; | |
1672 function byTagName(tn) { return root.getElementsByTagName(tn); } | |
1673 // fetch a list of nodes to rewrite | |
1674 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp'
)]; | |
1675 var elements = []; | |
1676 for (var i = 0; i < codeSegments.length; ++i) { | |
1677 for (var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
1678 elements.push(codeSegments[i][j]); | |
1679 } | |
1680 } | |
1681 codeSegments = null; | |
1682 | |
1683 var clock = Date; | |
1684 if (!clock['now']) { | |
1685 clock = { 'now': function () { return +(new Date); } }; | |
1686 } | |
1687 | |
1688 // The loop is broken into a series of continuations to make sure that w
e | |
1689 // don't make the browser unresponsive when rewriting a large page. | |
1690 var k = 0; | |
1691 var prettyPrintingJob; | |
1692 | |
1693 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; | |
1694 var prettyPrintRe = /\bprettyprint\b/; | |
1695 var prettyPrintedRe = /\bprettyprinted\b/; | |
1696 var preformattedTagNameRe = /pre|xmp/i; | |
1697 var codeRe = /^code$/i; | |
1698 var preCodeXmpRe = /^(?:pre|code|xmp)$/i; | |
1699 var EMPTY = {}; | |
1700 | |
1701 function doWork() { | |
1702 var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ? | |
1703 clock['now']() + 250 /* ms */ : | |
1704 Infinity); | |
1705 for (; k < elements.length && clock['now']() < endTime; k++) { | |
1706 var cs = elements[k]; | |
1707 | |
1708 // Look for a preceding comment like | |
1709 // <?prettify lang="..." linenums="..."?> | |
1710 var attrs = EMPTY; | |
1711 { | |
1712 for (var preceder = cs; (preceder = preceder.previousSibling);) { | |
1713 var nt = preceder.nodeType; | |
1714 // <?foo?> is parsed by HTML 5 to a comment node (8) | |
1715 // like <!--?foo?-->, but in XML is a processing instruction | |
1716 var value = (nt === 7 || nt === 8) && preceder.nodeValue; | |
1717 if (value | |
1718 ? !/^\??prettify\b/.test(value) | |
1719 : (nt !== 3 || /\S/.test(preceder.nodeValue))) { | |
1720 // Skip over white-space text nodes but not others. | |
1721 break; | |
1722 } | |
1723 if (value) { | |
1724 attrs = {}; | |
1725 value.replace( | |
1726 /\b(\w+)=([\w:.%+-]+)/g, | |
1727 function (_, name, value) { attrs[name] = value; }); | |
1728 break; | |
1729 } | |
1730 } | |
1731 } | |
1732 | |
1733 var className = cs.className; | |
1734 if ((attrs !== EMPTY || prettyPrintRe.test(className)) | |
1735 // Don't redo this if we've already done it. | |
1736 // This allows recalling pretty print to just prettyprint elemen
ts | |
1737 // that have been added to the page since last call. | |
1738 && !prettyPrintedRe.test(className)) { | |
1739 | |
1740 // make sure this is not nested in an already prettified element | |
1741 var nested = false; | |
1742 for (var p = cs.parentNode; p; p = p.parentNode) { | |
1743 var tn = p.tagName; | |
1744 if (preCodeXmpRe.test(tn) | |
1745 && p.className && prettyPrintRe.test(p.className)) { | |
1746 nested = true; | |
1747 break; | |
1748 } | |
1749 } | |
1750 if (!nested) { | |
1751 // Mark done. If we fail to prettyprint for whatever reason, | |
1752 // we shouldn't try again. | |
1753 cs.className += ' prettyprinted'; | |
1754 | |
1755 // If the classes includes a language extensions, use it. | |
1756 // Language extensions can be specified like | |
1757 // <pre class="prettyprint lang-cpp"> | |
1758 // the language extension "cpp" is used to find a language handl
er | |
1759 // as passed to PR.registerLangHandler. | |
1760 // HTML5 recommends that a language be specified using "language
-" | |
1761 // as the prefix instead. Google Code Prettify supports both. | |
1762 // http://dev.w3.org/html5/spec-author-view/the-code-element.htm
l | |
1763 var langExtension = attrs['lang']; | |
1764 if (!langExtension) { | |
1765 langExtension = className.match(langExtensionRe); | |
1766 // Support <pre class="prettyprint"><code class="language-c"> | |
1767 var wrapper; | |
1768 if (!langExtension && (wrapper = childContentWrapper(cs)) | |
1769 && codeRe.test(wrapper.tagName)) { | |
1770 langExtension = wrapper.className.match(langExtensionRe); | |
1771 } | |
1772 | |
1773 if (langExtension) { langExtension = langExtension[1]; } | |
1774 } | |
1775 | |
1776 var preformatted; | |
1777 if (preformattedTagNameRe.test(cs.tagName)) { | |
1778 preformatted = 1; | |
1779 } else { | |
1780 var currentStyle = cs['currentStyle']; | |
1781 var defaultView = doc.defaultView; | |
1782 var whitespace = ( | |
1783 currentStyle | |
1784 ? currentStyle['whiteSpace'] | |
1785 : (defaultView | |
1786 && defaultView.getComputedStyle) | |
1787 ? defaultView.getComputedStyle(cs, null) | |
1788 .getPropertyValue('white-space') | |
1789 : 0); | |
1790 preformatted = whitespace | |
1791 && 'pre' === whitespace.substring(0, 3); | |
1792 } | |
1793 | |
1794 // Look for a class like linenums or linenums:<n> where <n> is t
he | |
1795 // 1-indexed number of the first line. | |
1796 var lineNums = attrs['linenums']; | |
1797 if (!(lineNums = lineNums === 'true' || +lineNums)) { | |
1798 lineNums = className.match(/\blinenums\b(?::(\d+))?/); | |
1799 lineNums = | |
1800 lineNums | |
1801 ? lineNums[1] && lineNums[1].length | |
1802 ? +lineNums[1] : true | |
1803 : false; | |
1804 } | |
1805 if (lineNums) { numberLines(cs, lineNums, preformatted); } | |
1806 | |
1807 // do the pretty printing | |
1808 prettyPrintingJob = { | |
1809 langExtension: langExtension, | |
1810 sourceNode: cs, | |
1811 numberLines: lineNums, | |
1812 pre: preformatted | |
1813 }; | |
1814 applyDecorator(prettyPrintingJob); | |
1815 } | |
1816 } | |
1817 } | |
1818 if (k < elements.length) { | |
1819 // finish up in a continuation | |
1820 setTimeout(doWork, 250); | |
1821 } else if ('function' === typeof opt_whenDone) { | |
1822 opt_whenDone(); | |
1823 } | |
1824 } | |
1825 | |
1826 doWork(); | |
1827 } | |
1828 | |
1829 /** | |
1830 * Contains functions for creating and registering new language handlers. | |
1831 * @type {Object} | |
1832 */ | |
1833 var PR = win['PR'] = { | |
1834 'createSimpleLexer': createSimpleLexer, | |
1835 'registerLangHandler': registerLangHandler, | |
1836 'sourceDecorator': sourceDecorator, | |
1837 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, | |
1838 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, | |
1839 'PR_COMMENT': PR_COMMENT, | |
1840 'PR_DECLARATION': PR_DECLARATION, | |
1841 'PR_KEYWORD': PR_KEYWORD, | |
1842 'PR_LITERAL': PR_LITERAL, | |
1843 'PR_NOCODE': PR_NOCODE, | |
1844 'PR_PLAIN': PR_PLAIN, | |
1845 'PR_PUNCTUATION': PR_PUNCTUATION, | |
1846 'PR_SOURCE': PR_SOURCE, | |
1847 'PR_STRING': PR_STRING, | |
1848 'PR_TAG': PR_TAG, | |
1849 'PR_TYPE': PR_TYPE, | |
1850 'prettyPrintOne': | |
1851 IN_GLOBAL_SCOPE | |
1852 ? (win['prettyPrintOne'] = $prettyPrintOne) | |
1853 : (prettyPrintOne = $prettyPrintOne), | |
1854 'prettyPrint': prettyPrint = | |
1855 IN_GLOBAL_SCOPE | |
1856 ? (win['prettyPrint'] = $prettyPrint) | |
1857 : (prettyPrint = $prettyPrint) | |
1858 }; | |
1859 | |
1860 // Make PR available via the Asynchronous Module Definition (AMD) API. | |
1861 // Per https://github.com/amdjs/amdjs-api/wiki/AMD: | |
1862 // The Asynchronous Module Definition (AMD) API specifies a | |
1863 // mechanism for defining modules such that the module and its | |
1864 // dependencies can be asynchronously loaded. | |
1865 // ... | |
1866 // To allow a clear indicator that a global define function (as | |
1867 // needed for script src browser loading) conforms to the AMD API, | |
1868 // any global define function SHOULD have a property called "amd" | |
1869 // whose value is an object. This helps avoid conflict with any | |
1870 // other existing JavaScript code that could have defined a define() | |
1871 // function that does not conform to the AMD API. | |
1872 if (typeof define === "function" && define['amd']) { | |
1873 define("google-code-prettify", [], function () { | |
1874 return PR; | |
1875 }); | |
1876 } | |
1877 })(); | |
1878 return prettyPrint; | |
1879 })(); | |
1880 | |
1881 // If this script is deferred or async and the document is already | |
1882 // loaded we need to wait for language handlers to load before performing | |
1883 // any autorun. | |
1884 function onLangsLoaded() { | |
1885 if (autorun) { | |
1886 contentLoaded( | |
1887 function () { | |
1888 var n = callbacks.length; | |
1889 var callback = n ? function () { | |
1890 for (var i = 0; i < n; ++i) { | |
1891 (function (i) { | |
1892 setTimeout( | |
1893 function () { | |
1894 win['exports'][callbacks[i]].apply(win, arguments); | |
1895 }, 0); | |
1896 })(i); | |
1897 } | |
1898 } : void 0; | |
1899 prettyPrint(callback); | |
1900 }); | |
1901 } | |
1902 } | |
1903 checkPendingLanguages(); | |
1904 | |
1905 }()); | |
OLD | NEW |