bower_components/google-code-prettify/src/prettify.js - Issue 786953007: npm_modules: Fork bower_components into Polymer 0.4.0 and 0.5.0 versions - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(169)

My Issues | Starred Open | Closed | All

Side by Side Diff: bower_components/google-code-prettify/src/prettify.js

Issue 786953007: npm_modules: Fork bower_components into Polymer 0.4.0 and 0.5.0 versions (Closed) Base URL: https://chromium.googlesource.com/infra/third_party/npm_modules.git@master

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « bower_components/google-code-prettify/src/prettify.css ('k') | bower_components/google-code-prettify/src/run_prettify.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright (C) 2006 Google Inc.

2 //

3 // Licensed under the Apache License, Version 2.0 (the "License");

4 // you may not use this file except in compliance with the License.

5 // You may obtain a copy of the License at

6 //

7 // http://www.apache.org/licenses/LICENSE-2.0

8 //

9 // Unless required by applicable law or agreed to in writing, software

10 // distributed under the License is distributed on an "AS IS" BASIS,

11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12 // See the License for the specific language governing permissions and

13 // limitations under the License.

14

15

16 /**

17 * @fileoverview

18 * some functions for browser-side pretty printing of code contained in html.

19 *

20 * <p>

21 * For a fairly comprehensive set of languages see the

22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#lan gs">README</a>

23 * file that came with this source. At a minimum, the lexer should work on a

24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML,

25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk

26 * and a subset of Perl, but, because of commenting conventions, doesn't work on

27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.

28 * <p>

29 * Usage: <ol>

30 * <li> include this source file in an html page via

31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}

32 * <li> define style rules. See the example page for examples.

33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with

34 * {@code class=prettyprint.}

35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty

36 * printer needs to do more substantial DOM manipulations to support that, so

37 * some css styles may not be preserved.

38 * </ol>

39 * That's it. I wanted to keep the API as simple as possible, so there's no

40 * need to specify which language the code is in, but if you wish, you can add

41 * another class to the {@code <pre>} or {@code <code>} element to specify the

42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that

43 * starts with "lang-" followed by a file extension, specifies the file type.

44 * See the "lang-*.js" files in this directory for code that implements

45 * per-language file handlers.

46 * <p>

47 * Change log:<br>

48 * cbeust, 2006/08/22

49 * <blockquote>

50 * Java annotations (start with "@") are now captured as literals ("lit")

51 * </blockquote>

52 * @requires console

53 */

54

55 // JSLint declarations

56 /global console, document, navigator, setTimeout, window, define /

57

58 /** @define {boolean} */

59 var IN_GLOBAL_SCOPE = true;

60

61 /**

62 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with

63 * UI events.

64 * If set to {@code false}, {@code prettyPrint()} is synchronous.

65 */

66 window['PR_SHOULD_USE_CONTINUATION'] = true;

67

68 /**

69 * Pretty print a chunk of code.

70 * @param {string} sourceCodeHtml The HTML to pretty print.

71 * @param {string} opt_langExtension The language name to use.

72 * Typically, a filename extension like 'cpp' or 'java'.

73 * @param {number\|boolean} opt_numberLines True to number lines,

74 * or the 1-indexed number of the first line in sourceCodeHtml.

75 * @return {string} code as html, but prettier

76 */

77 var prettyPrintOne;

78 /**

79 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with

80 * {@code class=prettyprint} and prettify them.

81 *

82 * @param {Function} opt_whenDone called when prettifying is done.

83 * @param {HTMLElement\|HTMLDocument} opt_root an element or document

84 * containing all the elements to pretty print.

85 * Defaults to {@code document.body}.

86 */

87 var prettyPrint;

88

89

90 (function () {

91 var win = window;

92 // Keyword lists for various languages.

93 // We use things that coerce to strings to make them compact when minified

94 // and to defeat aggressive optimizers that fold large string constants.

95 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];

96 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +

97 "double,enum,extern,float,goto,inline,int,long,register,short,signed," +

98 "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"];

99 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +

100 "new,operator,private,protected,public,this,throw,true,try,typeof"];

101 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," +

102 "concept,concept_map,const_cast,constexpr,decltype,delegate," +

103 "dynamic_cast,explicit,export,friend,generic,late_check," +

104 "mutable,namespace,nullptr,property,reinterpret_cast,static_assert," +

105 "static_cast,template,typeid,typename,using,virtual,where"];

106 var JAVA_KEYWORDS = [COMMON_KEYWORDS,

107 "abstract,assert,boolean,byte,extends,final,finally,implements,import," +

108 "instanceof,interface,null,native,package,strictfp,super,synchronized," +

109 "throws,transient"];

110 var CSHARP_KEYWORDS = [JAVA_KEYWORDS,

111 "as,base,by,checked,decimal,delegate,descending,dynamic,event," +

112 "fixed,foreach,from,group,implicit,in,internal,into,is,let," +

113 "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte," +

114 "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort," +

115 "var,virtual,where"];

116 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +

117 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +

118 "throw,true,try,unless,until,when,while,yes";

119 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,

120 "debugger,eval,export,function,get,null,set,undefined,var,with," +

121 "Infinity,NaN"];

122 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +

123 "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +

124 "sub,undef,unless,until,use,wantarray,while,BEGIN,END";

125 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +

126 "elif,except,exec,finally,from,global,import,in,is,lambda," +

127 "nonlocal,not,or,pass,print,raise,try,with,yield," +

128 "False,True,None"];

129 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +

130 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +

131 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +

132 "BEGIN,END"];

133 var RUST_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "as,assert,const,copy,drop," +

134 "enum,extern,fail,false,fn,impl,let,log,loop,match,mod,move,mut,priv," +

135 "pub,pure,ref,self,static,struct,true,trait,type,unsafe,use"];

136 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +

137 "function,in,local,set,then,until"];

138 var ALL_KEYWORDS = [

139 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS,

140 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];

141 var C_TYPES = /^(DIR\|FILE\|vector\|(de\|priority_)?queue\|list\|stack\|(const_)?iter ator\|(multi)?(set\|map)\|bitset\|u?(int\|float)\d*)\b/;

142

143 // token style names. correspond to css classes

144 /**

145 * token style for a string literal

146 * @const

147 */

148 var PR_STRING = 'str';

149 /**

150 * token style for a keyword

151 * @const

152 */

153 var PR_KEYWORD = 'kwd';

154 /**

155 * token style for a comment

156 * @const

157 */

158 var PR_COMMENT = 'com';

159 /**

160 * token style for a type

161 * @const

162 */

163 var PR_TYPE = 'typ';

164 /**

165 * token style for a literal value. e.g. 1, null, true.

166 * @const

167 */

168 var PR_LITERAL = 'lit';

169 /**

170 * token style for a punctuation string.

171 * @const

172 */

173 var PR_PUNCTUATION = 'pun';

174 /**

175 * token style for plain text.

176 * @const

177 */

178 var PR_PLAIN = 'pln';

179

180 /**

181 * token style for an sgml tag.

182 * @const

183 */

184 var PR_TAG = 'tag';

185 /**

186 * token style for a markup declaration such as a DOCTYPE.

187 * @const

188 */

189 var PR_DECLARATION = 'dec';

190 /**

191 * token style for embedded source.

192 * @const

193 */

194 var PR_SOURCE = 'src';

195 /**

196 * token style for an sgml attribute name.

197 * @const

198 */

199 var PR_ATTRIB_NAME = 'atn';

200 /**

201 * token style for an sgml attribute value.

202 * @const

203 */

204 var PR_ATTRIB_VALUE = 'atv';

205

206 /**

207 * A class that indicates a section of markup that is not code, e.g. to allow

208 * embedding of line numbers within code listings.

209 * @const

210 */

211 var PR_NOCODE = 'nocode';

212

213

214

215 /**

216 * A set of tokens that can precede a regular expression literal in

217 * javascript

218 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/languag e/js20/rationale/syntax.html

219 * has the full list, but I've removed ones that might be problematic when

220 * seen in languages that don't support regular expression literals.

221 *

222 * <p>Specifically, I've removed any keywords that can't precede a regexp

223 * literal in a syntactically legal javascript program, and I've removed the

224 * "in" keyword since it's not a keyword in many languages, and might be used

225 * as a count of inches.

226 *

227 * <p>The link above does not accurately describe EcmaScript rules since

228 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works

229 * very well in practice.

230 *

231 * @private

232 * @const

233 */

234 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?\|[+-]\|[!=]=?=?\|\\#\|%=?\|&&?=?\|\\(\|\\=? \|[+\\-]=\|->\|\\/=?\|::?\|<<?=?\|>>?>?=?\|,\|;\|\\?\|@\|\\[\|~\|{\|\\^\\^?=?\|\\\|\\\|?=?\|break\| case\|continue\|delete\|do\|else\|finally\|instanceof\|return\|throw\|try\|typeof)\\s';

235

236 // CAVEAT: this does not properly handle the case where a regular

237 // expression immediately follows another since a regular expression may

238 // have flags for case-sensitivity and the like. Having regexp tokens

239 // adjacent is not valid in any language I'm aware of, so I'm punting.

240 // TODO: maybe style special characters inside a regexp as punctuation.

241

242 /**

243 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally

244 * matches the union of the sets of strings matched by the input RegExp.

245 * Since it matches globally, if the input strings have a start-of-input

246 * anchor (/^.../), it is ignored for the purposes of unioning.

247 * @param {Array.<RegExp>} regexs non multiline, non-global regexs.

248 * @return {RegExp} a global regex.

249 */

250 function combinePrefixPatterns(regexs) {

251 var capturedGroupIndex = 0;

252

253 var needToFoldCase = false;

254 var ignoreCase = false;

255 for (var i = 0, n = regexs.length; i < n; ++i) {

256 var regex = regexs[i];

257 if (regex.ignoreCase) {

258 ignoreCase = true;

259 } else if (/[a-z]/i.test(regex.source.replace(

260 /\\u[0-9a-f]{4}\|\\x[0-9a-f]{2}\|\\[^ux]/gi, ''))) {

261 needToFoldCase = true;

262 ignoreCase = false;

263 break;

264 }

265 }

266

267 var escapeCharToCodeUnit = {

268 'b': 8,

269 't': 9,

270 'n': 0xa,

271 'v': 0xb,

272 'f': 0xc,

273 'r': 0xd

274 };

275

276 function decodeEscape(charsetPart) {

277 var cc0 = charsetPart.charCodeAt(0);

278 if (cc0 !== 92 /* \\ */) {

279 return cc0;

280 }

281 var c1 = charsetPart.charAt(1);

282 cc0 = escapeCharToCodeUnit[c1];

283 if (cc0) {

284 return cc0;

285 } else if ('0' <= c1 && c1 <= '7') {

286 return parseInt(charsetPart.substring(1), 8);

287 } else if (c1 === 'u' \|\| c1 === 'x') {

288 return parseInt(charsetPart.substring(2), 16);

289 } else {

290 return charsetPart.charCodeAt(1);

291 }

292 }

293

294 function encodeEscape(charCode) {

295 if (charCode < 0x20) {

296 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);

297 }

298 var ch = String.fromCharCode(charCode);

299 return (ch === '\\' \|\| ch === '-' \|\| ch === ']' \|\| ch === '^')

300 ? "\\" + ch : ch;

301 }

302

303 function caseFoldCharset(charSet) {

304 var charsetParts = charSet.substring(1, charSet.length - 1).match(

305 new RegExp(

306 '\\\\u[0-9A-Fa-f]{4}'

307 + '\|\\\\x[0-9A-Fa-f]{2}'

308 + '\|\\\\[0-3][0-7]{0,2}'

309 + '\|\\\\[0-7]{1,2}'

310 + '\|\\\\[\\s\\S]'

311 + '\|-'

312 + '\|[^-\\\\]',

313 'g'));

314 var ranges = [];

315 var inverse = charsetParts[0] === '^';

316

317 var out = ['['];

318 if (inverse) { out.push('^'); }

319

320 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {

321 var p = charsetParts[i];

322 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.

323 out.push(p);

324 } else {

325 var start = decodeEscape(p);

326 var end;

327 if (i + 2 < n && '-' === charsetParts[i + 1]) {

328 end = decodeEscape(charsetParts[i + 2]);

329 i += 2;

330 } else {

331 end = start;

332 }

333 ranges.push([start, end]);

334 // If the range might intersect letters, then expand it.

335 // This case handling is too simplistic.

336 // It does not deal with non-latin case folding.

337 // It works for latin source code identifiers though.

338 if (!(end < 65 \|\| start > 122)) {

339 if (!(end < 65 \|\| start > 90)) {

340 ranges.push([Math.max(65, start) \| 32, Math.min(end, 90) \| 32]);

341 }

342 if (!(end < 97 \|\| start > 122)) {

343 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]) ;

344 }

345 }

346 }

347 }

348

349 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]

350 // -> [[1, 12], [14, 14], [16, 17]]

351 ranges.sort(function (a, b) { return (a[0] - b[0]) \|\| (b[1] - a[1]); });

352 var consolidatedRanges = [];

353 var lastRange = [];

354 for (var i = 0; i < ranges.length; ++i) {

355 var range = ranges[i];

356 if (range[0] <= lastRange[1] + 1) {

357 lastRange[1] = Math.max(lastRange[1], range[1]);

358 } else {

359 consolidatedRanges.push(lastRange = range);

360 }

361 }

362

363 for (var i = 0; i < consolidatedRanges.length; ++i) {

364 var range = consolidatedRanges[i];

365 out.push(encodeEscape(range[0]));

366 if (range[1] > range[0]) {

367 if (range[1] + 1 > range[0]) { out.push('-'); }

368 out.push(encodeEscape(range[1]));

369 }

370 }

371 out.push(']');

372 return out.join('');

373 }

374

375 function allowAnywhereFoldCaseAndRenumberGroups(regex) {

376 // Split into character sets, escape sequences, punctuation strings

377 // like ('(', '(?:', ')', '^'), and runs of characters that do not

378 // include any of the above.

379 var parts = regex.source.match(

380 new RegExp(

381 '(?:'

382 + '\\[(?:[^\\x5C\\x5D]\|\\\\[\\s\\S])*\\]' // a character set

383 + '\|\\\\u[A-Fa-f0-9]{4}' // a unicode escape

384 + '\|\\\\x[A-Fa-f0-9]{2}' // a hex escape

385 + '\|\\\\[0-9]+' // a back-reference or octal escape

386 + '\|\\\\[^ux0-9]' // other escape sequence

387 + '\|\\(\\?[:!=]' // start of a non-capturing group

388 + '\|[\\(\\)\\^]' // start/end of a group, or line start

389 + '\|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters

390 + ')',

391 'g'));

392 var n = parts.length;

393

394 // Maps captured group numbers to the number they will occupy in

395 // the output or to -1 if that has not been determined, or to

396 // undefined if they need not be capturing in the output.

397 var capturedGroups = [];

398

399 // Walk over and identify back references to build the capturedGroups

400 // mapping.

401 for (var i = 0, groupIndex = 0; i < n; ++i) {

402 var p = parts[i];

403 if (p === '(') {

404 // groups are 1-indexed, so max group index is count of '('

405 ++groupIndex;

406 } else if ('\\' === p.charAt(0)) {

407 var decimalValue = +p.substring(1);

408 if (decimalValue) {

409 if (decimalValue <= groupIndex) {

410 capturedGroups[decimalValue] = -1;

411 } else {

412 // Replace with an unambiguous escape sequence so that

413 // an octal escape sequence does not turn into a backreference

414 // to a capturing group from an earlier regex.

415 parts[i] = encodeEscape(decimalValue);

416 }

417 }

418 }

419 }

420

421 // Renumber groups and reduce capturing groups to non-capturing groups

422 // where possible.

423 for (var i = 1; i < capturedGroups.length; ++i) {

424 if (-1 === capturedGroups[i]) {

425 capturedGroups[i] = ++capturedGroupIndex;

426 }

427 }

428 for (var i = 0, groupIndex = 0; i < n; ++i) {

429 var p = parts[i];

430 if (p === '(') {

431 ++groupIndex;

432 if (!capturedGroups[groupIndex]) {

433 parts[i] = '(?:';

434 }

435 } else if ('\\' === p.charAt(0)) {

436 var decimalValue = +p.substring(1);

437 if (decimalValue && decimalValue <= groupIndex) {

438 parts[i] = '\\' + capturedGroups[decimalValue];

439 }

440 }

441 }

442

443 // Remove any prefix anchors so that the output will match anywhere.

444 // ^^ really does mean an anchored match though.

445 for (var i = 0; i < n; ++i) {

446 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }

447 }

448

449 // Expand letters to groups to handle mixing of case-sensitive and

450 // case-insensitive patterns if necessary.

451 if (regex.ignoreCase && needToFoldCase) {

452 for (var i = 0; i < n; ++i) {

453 var p = parts[i];

454 var ch0 = p.charAt(0);

455 if (p.length >= 2 && ch0 === '[') {

456 parts[i] = caseFoldCharset(p);

457 } else if (ch0 !== '\\') {

458 // TODO: handle letters in numeric escapes.

459 parts[i] = p.replace(

460 /[a-zA-Z]/g,

461 function (ch) {

462 var cc = ch.charCodeAt(0);

463 return '[' + String.fromCharCode(cc & ~32, cc \| 32) + ']';

464 });

465 }

466 }

467 }

468

469 return parts.join('');

470 }

471

472 var rewritten = [];

473 for (var i = 0, n = regexs.length; i < n; ++i) {

474 var regex = regexs[i];

475 if (regex.global \|\| regex.multiline) { throw new Error('' + regex); }

476 rewritten.push(

477 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');

478 }

479

480 return new RegExp(rewritten.join('\|'), ignoreCase ? 'gi' : 'g');

481 }

482

483 /**

484 * Split markup into a string of source code and an array mapping ranges in

485 * that string to the text nodes in which they appear.

486 *

487 * <p>

488 * The HTML DOM structure:</p>

489 * <pre>

490 * (Element "p"

491 * (Element "b"

492 * (Text "print ")) ; #1

493 * (Text "'Hello '") ; #2

494 * (Element "br") ; #3

495 * (Text " + 'World';")) ; #4

496 * </pre>

497 * <p>

498 * corresponds to the HTML

499 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>

500 *

501 * <p>

502 * It will produce the output:</p>

503 * <pre>

504 * {

505 * sourceCode: "print 'Hello '\n + 'World';",

506 * // 1 2

507 * // 012345678901234 5678901234567

508 * spans: [0, #1, 6, #2, 14, #3, 15, #4]

509 * }

510 * </pre>

511 * <p>

512 * where #1 is a reference to the {@code "print "} text node above, and so

513 * on for the other text nodes.

514 * </p>

515 *

516 * <p>

517 * The {@code} spans array is an array of pairs. Even elements are the start

518 * indices of substrings, and odd elements are the text nodes (or BR elements)

519 * that contain the text for those substrings.

520 * Substrings continue until the next index or the end of the source.

521 * </p>

522 *

523 * @param {Node} node an HTML DOM subtree containing source-code.

524 * @param {boolean} isPreformatted true if white-space in text nodes should

525 * be considered significant.

526 * @return {Object} source code and the text nodes in which they occur.

527 */

528 function extractSourceSpans(node, isPreformatted) {

529 var nocode = /(?:^\|\s)nocode(?:\s\|$)/;

530

531 var chunks = [];

532 var length = 0;

533 var spans = [];

534 var k = 0;

535

536 function walk(node) {

537 var type = node.nodeType;

538 if (type == 1) { // Element

539 if (nocode.test(node.className)) { return; }

540 for (var child = node.firstChild; child; child = child.nextSibling) {

541 walk(child);

542 }

543 var nodeName = node.nodeName.toLowerCase();

544 if ('br' === nodeName \|\| 'li' === nodeName) {

545 chunks[k] = '\n';

546 spans[k << 1] = length++;

547 spans[(k++ << 1) \| 1] = node;

548 }

549 } else if (type == 3 \|\| type == 4) { // Text

550 var text = node.nodeValue;

551 if (text.length) {

552 if (!isPreformatted) {

553 text = text.replace(/[ \t\r\n]+/g, ' ');

554 } else {

555 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.

556 }

557 // TODO: handle tabs here?

558 chunks[k] = text;

559 spans[k << 1] = length;

560 length += text.length;

561 spans[(k++ << 1) \| 1] = node;

562 }

563 }

564 }

565

566 walk(node);

567

568 return {

569 sourceCode: chunks.join('').replace(/\n$/, ''),

570 spans: spans

571 };

572 }

573

574 /**

575 * Apply the given language handler to sourceCode and add the resulting

576 * decorations to out.

577 * @param {number} basePos the index of sourceCode within the chunk of source

578 * whose decorations are already present on out.

579 */

580 function appendDecorations(basePos, sourceCode, langHandler, out) {

581 if (!sourceCode) { return; }

582 var job = {

583 sourceCode: sourceCode,

584 basePos: basePos

585 };

586 langHandler(job);

587 out.push.apply(out, job.decorations);

588 }

589

590 var notWs = /\S/;

591

592 /**

593 * Given an element, if it contains only one child element and any text nodes

594 * it contains contain only space characters, return the sole child element.

595 * Otherwise returns undefined.

596 * <p>

597 * This is meant to return the CODE element in {@code <pre><code ...>} when

598 * there is a single child element that contains all the non-space textual

599 * content, but not to return anything where there are multiple child elements

600 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there

601 * is textual content.

602 */

603 function childContentWrapper(element) {

604 var wrapper = undefined;

605 for (var c = element.firstChild; c; c = c.nextSibling) {

606 var type = c.nodeType;

607 wrapper = (type === 1) // Element Node

608 ? (wrapper ? element : c)

609 : (type === 3) // Text Node

610 ? (notWs.test(c.nodeValue) ? element : wrapper)

611 : wrapper;

612 }

613 return wrapper === element ? undefined : wrapper;

614 }

615

616 /** Given triples of [style, pattern, context] returns a lexing function,

617 * The lexing function interprets the patterns to find token boundaries and

618 * returns a decoration list of the form

619 * [index_0, style_0, index_1, style_1, ..., index_n, style_n]

620 * where index_n is an index into the sourceCode, and style_n is a style

621 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to

622 * all characters in sourceCode[index_n-1:index_n].

623 *

624 * The stylePatterns is a list whose elements have the form

625 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].

626 *

627 * Style is a style constant like PR_PLAIN, or can be a string of the

628 * form 'lang-FOO', where FOO is a language extension describing the

629 * language of the portion of the token in $1 after pattern executes.

630 * E.g., if style is 'lang-lisp', and group 1 contains the text

631 * '(hello (world))', then that portion of the token will be passed to the

632 * registered lisp handler for formatting.

633 * The text before and after group 1 will be restyled using this decorator

634 * so decorators should take care that this doesn't result in infinite

635 * recursion. For example, the HTML lexer rule for SCRIPT elements looks

636 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match

637 * '<script>foo()<\/script>', which would cause the current decorator to

638 * be called with '<script>' which would not match the same rule since

639 * group 1 must not be empty, so it would be instead styled as PR_TAG by

640 * the generic tag rule. The handler registered for the 'js' extension would

641 * then be called with 'foo()', and finally, the current decorator would

642 * be called with '<\/script>' which would not match the original rule and

643 * so the generic tag rule would identify it as a tag.

644 *

645 * Pattern must only match prefixes, and if it matches a prefix, then that

646 * match is considered a token with the same style.

647 *

648 * Context is applied to the last non-whitespace, non-comment token

649 * recognized.

650 *

651 * Shortcut is an optional string of characters, any of which, if the first

652 * character, gurantee that this pattern and only this pattern matches.

653 *

654 * @param {Array} shortcutStylePatterns patterns that always start with

655 * a known character. Must have a shortcut string.

656 * @param {Array} fallthroughStylePatterns patterns that will be tried in

657 * order if the shortcut ones fail. May have shortcuts.

658 *

659 * @return {function (Object)} a

660 * function that takes source code and returns a list of decorations.

661 */

662 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {

663 var shortcuts = {};

664 var tokenizer;

665 (function () {

666 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);

667 var allRegexs = [];

668 var regexKeys = {};

669 for (var i = 0, n = allPatterns.length; i < n; ++i) {

670 var patternParts = allPatterns[i];

671 var shortcutChars = patternParts[3];

672 if (shortcutChars) {

673 for (var c = shortcutChars.length; --c >= 0;) {

674 shortcuts[shortcutChars.charAt(c)] = patternParts;

675 }

676 }

677 var regex = patternParts[1];

678 var k = '' + regex;

679 if (!regexKeys.hasOwnProperty(k)) {

680 allRegexs.push(regex);

681 regexKeys[k] = null;

682 }

683 }

684 allRegexs.push(/[\0-\uffff]/);

685 tokenizer = combinePrefixPatterns(allRegexs);

686 })();

687

688 var nPatterns = fallthroughStylePatterns.length;

689

690 /**

691 * Lexes job.sourceCode and produces an output array job.decorations of

692 * style classes preceded by the position at which they start in

693 * job.sourceCode in order.

694 *

695 * @param {Object} job an object like <pre>{

696 * sourceCode: {string} sourceText plain text,

697 * basePos: {int} position of job.sourceCode in the larger chunk of

698 * sourceCode.

699 * }</pre>

700 */

701 var decorate = function (job) {

702 var sourceCode = job.sourceCode, basePos = job.basePos;

703 /** Even entries are positions in source in ascending order. Odd enties

704 * are style markers (e.g., PR_COMMENT) that run from that position until

705 * the end.

706 * @type {Array.<number\|string>}

707 */

708 var decorations = [basePos, PR_PLAIN];

709 var pos = 0; // index into sourceCode

710 var tokens = sourceCode.match(tokenizer) \|\| [];

711 var styleCache = {};

712

713 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {

714 var token = tokens[ti];

715 var style = styleCache[token];

716 var match = void 0;

717

718 var isEmbedded;

719 if (typeof style === 'string') {

720 isEmbedded = false;

721 } else {

722 var patternParts = shortcuts[token.charAt(0)];

723 if (patternParts) {

724 match = token.match(patternParts[1]);

725 style = patternParts[0];

726 } else {

727 for (var i = 0; i < nPatterns; ++i) {

728 patternParts = fallthroughStylePatterns[i];

729 match = token.match(patternParts[1]);

730 if (match) {

731 style = patternParts[0];

732 break;

733 }

734 }

735

736 if (!match) { // make sure that we make progress

737 style = PR_PLAIN;

738 }

739 }

740

741 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);

742 if (isEmbedded && !(match && typeof match[1] === 'string')) {

743 isEmbedded = false;

744 style = PR_SOURCE;

745 }

746

747 if (!isEmbedded) { styleCache[token] = style; }

748 }

749

750 var tokenStart = pos;

751 pos += token.length;

752

753 if (!isEmbedded) {

754 decorations.push(basePos + tokenStart, style);

755 } else { // Treat group 1 as an embedded block of source code.

756 var embeddedSource = match[1];

757 var embeddedSourceStart = token.indexOf(embeddedSource);

758 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;

759 if (match[2]) {

760 // If embeddedSource can be blank, then it would match at the

761 // beginning which would cause us to infinitely recurse on the

762 // entire token, so we catch the right context in match[2].

763 embeddedSourceEnd = token.length - match[2].length;

764 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;

765 }

766 var lang = style.substring(5);

767 // Decorate the left of the embedded source

768 appendDecorations(

769 basePos + tokenStart,

770 token.substring(0, embeddedSourceStart),

771 decorate, decorations);

772 // Decorate the embedded source

773 appendDecorations(

774 basePos + tokenStart + embeddedSourceStart,

775 embeddedSource,

776 langHandlerForExtension(lang, embeddedSource),

777 decorations);

778 // Decorate the right of the embedded section

779 appendDecorations(

780 basePos + tokenStart + embeddedSourceEnd,

781 token.substring(embeddedSourceEnd),

782 decorate, decorations);

783 }

784 }

785 job.decorations = decorations;

786 };

787 return decorate;

788 }

789

790 /** returns a function that produces a list of decorations from source text.

791 *

792 * This code treats ", ', and ` as string delimiters, and \ as a string

793 * escape. It does not recognize perl's qq() style strings.

794 * It has no special handling for double delimiter escapes as in basic, or

795 * the tripled delimiters used in python, but should work on those regardless

796 * although in those cases a single string literal may be broken up into

797 * multiple adjacent string literals.

798 *

799 * It recognizes C, C++, and shell style comments.

800 *

801 * @param {Object} options a set of optional parameters.

802 * @return {function (Object)} a function that examines the source code

803 * in the input job and builds the decoration list.

804 */

805 function sourceDecorator(options) {

806 var shortcutStylePatterns = [], fallthroughStylePatterns = [];

807 if (options['tripleQuotedStrings']) {

808 // '''multi-line-string''', 'single-line-string', and double-quoted

809 shortcutStylePatterns.push(

810 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]\|\\[\s\S]\|\'{1,2}(?=[^\']))(?:\'\'\ '\|$)\|\"\"\"(?:[^\"\\]\|\\[\s\S]\|\"{1,2}(?=[^\"]))(?:\"\"\"\|$)\|\'(?:[^\\\']\|\\[\s \S])(?:\'\|$)\|\"(?:[^\\\"]\|\\[\s\S])(?:\"\|$))/,

811 null, '\'"']);

812 } else if (options['multiLineStrings']) {

813 // 'multi-line-string', "multi-line-string"

814 shortcutStylePatterns.push(

815 [PR_STRING, /^(?:\'(?:[^\\\']\|\\[\s\S])(?:\'\|$)\|\"(?:[^\\\"]\|\\[\s\S ])(?:\"\|$)\|\`(?:[^\\\`]\|\\[\s\S])*(?:\`\|$))/,

816 null, '\'"`']);

817 } else {

818 // 'single-line-string', "single-line-string"

819 shortcutStylePatterns.push(

820 [PR_STRING,

821 /^(?:\'(?:[^\\\'\r\n]\|\\.)(?:\'\|$)\|\"(?:[^\\\"\r\n]\|\\.)(?:\"\|$))/,

822 null, '"\'']);

823 }

824 if (options['verbatimStrings']) {

825 // verbatim-string-literal production from the C# grammar. See issue 93.

826 fallthroughStylePatterns.push(

827 [PR_STRING, /^@\"(?:[^\"]\|\"\")*(?:\"\|$)/, null]);

828 }

829 var hc = options['hashComments'];

830 if (hc) {

831 if (options['cStyleComments']) {

832 if (hc > 1) { // multiline hash comments

833 shortcutStylePatterns.push(

834 [PR_COMMENT, /^#(?:##(?:[^#]\|#(?!##))(?:###\|$)\|.)/, null, '#']);

835 } else {

836 // Stop C preprocessor declarations at an unclosed open comment

837 shortcutStylePatterns.push(

838 [PR_COMMENT, /^#(?:(?:define\|e(?:l\|nd)if\|else\|error\|ifn?def\|includ e\|line\|pragma\|undef\|warning)\b\|[^\r\n]*)/,

839 null, '#']);

840 }

841 // #include <stdio.h>

842 fallthroughStylePatterns.push(

843 [PR_STRING,

844 /^<(?:(?:(?:\.\.\/)\|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h\|pp\|\ +\+)?\|[a-z]\w)>/,

845 null]);

846 } else {

847 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);

848 }

849 }

850 if (options['cStyleComments']) {

851 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);

852 fallthroughStylePatterns.push(

853 [PR_COMMENT, /^\/\[\s\S]?(?:\*\/\|$)/, null]);

854 }

855 var regexLiterals = options['regexLiterals'];

856 if (regexLiterals) {

857 /**

858 * @const

859 */

860 var regexExcls = regexLiterals > 1

861 ? '' // Multiline regex literals

862 : '\n\r';

863 /**

864 * @const

865 */

866 var regexAny = regexExcls ? '.' : '[\\S\\s]';

867 /**

868 * @const

869 */

870 var REGEX_LITERAL = (

871 // A regular expression literal starts with a slash that is

872 // not followed by * or / so that it is not confused with

873 // comments.

874 '/(?=[^/*' + regexExcls + '])'

875 // and then contains any number of raw characters,

876 + '(?:[^/\\x5B\\x5C' + regexExcls + ']'

877 // escape sequences (\x5C),

878 + '\|\\x5C' + regexAny

879 // or non-nesting character sets (\x5B\x5D);

880 + '\|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']'

881 + '\|\\x5C' + regexAny + ')*(?:\\x5D\|$))+'

882 // finally closed by a /.

883 + '/');

884 fallthroughStylePatterns.push(

885 ['lang-regex',

886 RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')

887 ]);

888 }

889

890 var types = options['types'];

891 if (types) {

892 fallthroughStylePatterns.push([PR_TYPE, types]);

893 }

894

895 var keywords = ("" + options['keywords']).replace(/^ \| $/g, '');

896 if (keywords.length) {

897 fallthroughStylePatterns.push(

898 [PR_KEYWORD,

899 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '\|') + ')\\b'),

900 null]);

901 }

902

903 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);

904

905 var punctuation =

906 // The Bash man page says

907

908 // A word is a sequence of characters considered as a single

909 // unit by GRUB. Words are separated by metacharacters,

910 // which are the following plus space, tab, and newline: { }

911 // \| & $ ; < >

912 // ...

913

914 // A word beginning with # causes that word and all remaining

915 // characters on that line to be ignored.

916

917 // which means that only a '#' after /(?:^\|[{}\|&$;<>\s])/ starts a

918 // comment but empirically

919 // $ echo {#}

920 // {#}

921 // $ echo \$#

922 // $#

923 // $ echo }#

924 // }#

925

926 // so /(?:^\|[\|&;<>\s])/ is more appropriate.

927

928 // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3

929 // suggests that this definition is compatible with a

930 // default mode that tries to use a single token definition

931 // to recognize both bash/python style comments and C

932 // preprocessor directives.

933

934 // This definition of punctuation does not include # in the list of

935 // follow-on exclusions, so # will not be broken before if preceeded

936 // by a punctuation character. We could try to exclude # after

937 // [\|&;<>] but that doesn't seem to cause many major problems.

938 // If that does turn out to be a problem, we should change the below

939 // when hc is truthy to include # in the run of punctuation characters

940 // only when not followint [\|&;<>].

941 '^.[^\\s\\w.$@\'"`/\\\\]*';

942 if (options['regexLiterals']) {

943 punctuation += '(?!\s*\/)';

944 }

945

946 fallthroughStylePatterns.push(

947 // TODO(mikesamuel): recognize non-latin letters and numerals in idents

948 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],

949 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*\|\w+_t\b)/, null],

950 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],

951 [PR_LITERAL,

952 new RegExp(

953 '^(?:'

954 // A hex number

955 + '0x[a-f0-9]+'

956 // or an octal or decimal number,

957 + '\|(?:\\d(?:_\\d+)\\d(?:\\.\\d*)?\|\\.\\d\\+)'

958 // possibly in scientific notation

959 + '(?:e[+\\-]?\\d+)?'

960 + ')'

961 // with an optional modifier like UL for unsigned long

962 + '[a-z]*', 'i'),

963 null, '0123456789'],

964 // Don't treat escaped quotes in bash as starting strings.

965 // See issue 144.

966 [PR_PLAIN, /^\\[\s\S]?/, null],

967 [PR_PUNCTUATION, new RegExp(punctuation), null]);

968

969 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);

970 }

971

972 var decorateSource = sourceDecorator({

973 'keywords': ALL_KEYWORDS,

974 'hashComments': true,

975 'cStyleComments': true,

976 'multiLineStrings': true,

977 'regexLiterals': true

978 });

979

980 /**

981 * Given a DOM subtree, wraps it in a list, and puts each line into its own

982 * list item.

983 *

984 * @param {Node} node modified in place. Its content is pulled into an

985 * HTMLOListElement, and each line is moved into a separate list item.

986 * This requires cloning elements, so the input might not have unique

987 * IDs after numbering.

988 * @param {boolean} isPreformatted true iff white-space in text nodes should

989 * be treated as significant.

990 */

991 function numberLines(node, opt_startLineNum, isPreformatted) {

992 var nocode = /(?:^\|\s)nocode(?:\s\|$)/;

993 var lineBreak = /\r\n?\|\n/;

994

995 var document = node.ownerDocument;

996

997 var li = document.createElement('li');

998 while (node.firstChild) {

999 li.appendChild(node.firstChild);

1000 }

1001 // An array of lines. We split below, so this is initialized to one

1002 // un-split line.

1003 var listItems = [li];

1004

1005 function walk(node) {

1006 var type = node.nodeType;

1007 if (type == 1 && !nocode.test(node.className)) { // Element

1008 if ('br' === node.nodeName) {

1009 breakAfter(node);

1010 // Discard the <BR> since it is now flush against a </LI>.

1011 if (node.parentNode) {

1012 node.parentNode.removeChild(node);

1013 }

1014 } else {

1015 for (var child = node.firstChild; child; child = child.nextSibling) {

1016 walk(child);

1017 }

1018 }

1019 } else if ((type == 3 \|\| type == 4) && isPreformatted) { // Text

1020 var text = node.nodeValue;

1021 var match = text.match(lineBreak);

1022 if (match) {

1023 var firstLine = text.substring(0, match.index);

1024 node.nodeValue = firstLine;

1025 var tail = text.substring(match.index + match[0].length);

1026 if (tail) {

1027 var parent = node.parentNode;

1028 parent.insertBefore(

1029 document.createTextNode(tail), node.nextSibling);

1030 }

1031 breakAfter(node);

1032 if (!firstLine) {

1033 // Don't leave blank text nodes in the DOM.

1034 node.parentNode.removeChild(node);

1035 }

1036 }

1037 }

1038 }

1039

1040 // Split a line after the given node.

1041 function breakAfter(lineEndNode) {

1042 // If there's nothing to the right, then we can skip ending the line

1043 // here, and move root-wards since splitting just before an end-tag

1044 // would require us to create a bunch of empty copies.

1045 while (!lineEndNode.nextSibling) {

1046 lineEndNode = lineEndNode.parentNode;

1047 if (!lineEndNode) { return; }

1048 }

1049

1050 function breakLeftOf(limit, copy) {

1051 // Clone shallowly if this node needs to be on both sides of the break.

1052 var rightSide = copy ? limit.cloneNode(false) : limit;

1053 var parent = limit.parentNode;

1054 if (parent) {

1055 // We clone the parent chain.

1056 // This helps us resurrect important styling elements that cross lines .

1057 // E.g. in <i>Foo<br>Bar</i>

1058 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.

1059 var parentClone = breakLeftOf(parent, 1);

1060 // Move the clone and everything to the right of the original

1061 // onto the cloned parent.

1062 var next = limit.nextSibling;

1063 parentClone.appendChild(rightSide);

1064 for (var sibling = next; sibling; sibling = next) {

1065 next = sibling.nextSibling;

1066 parentClone.appendChild(sibling);

1067 }

1068 }

1069 return rightSide;

1070 }

1071

1072 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);

1073

1074 // Walk the parent chain until we reach an unattached LI.

1075 for (var parent;

1076 // Check nodeType since IE invents document fragments.

1077 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {

1078 copiedListItem = parent;

1079 }

1080 // Put it on the list of lines for later processing.

1081 listItems.push(copiedListItem);

1082 }

1083

1084 // Split lines while there are lines left to split.

1085 for (var i = 0; // Number of lines that have been split so far.

1086 i < listItems.length; // length updated by breakAfter calls.

1087 ++i) {

1088 walk(listItems[i]);

1089 }

1090

1091 // Make sure numeric indices show correctly.

1092 if (opt_startLineNum === (opt_startLineNum\|0)) {

1093 listItems[0].setAttribute('value', opt_startLineNum);

1094 }

1095

1096 var ol = document.createElement('ol');

1097 ol.className = 'linenums';

1098 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) \| 0) \|\| 0 ;

1099 for (var i = 0, n = listItems.length; i < n; ++i) {

1100 li = listItems[i];

1101 // Stick a class on the LIs so that stylesheets can

1102 // color odd/even rows, or any other row pattern that

1103 // is co-prime with 10.

1104 li.className = 'L' + ((i + offset) % 10);

1105 if (!li.firstChild) {

1106 li.appendChild(document.createTextNode('\xA0'));

1107 }

1108 ol.appendChild(li);

1109 }

1110

1111 node.appendChild(ol);

1112 }

1113 /**

1114 * Breaks {@code job.sourceCode} around style boundaries in

1115 * {@code job.decorations} and modifies {@code job.sourceNode} in place.

1116 * @param {Object} job like <pre>{

1117 * sourceCode: {string} source as plain text,

1118 * sourceNode: {HTMLElement} the element containing the source,

1119 * spans: {Array.<number\|Node>} alternating span start indices into source

1120 * and the text node or element (e.g. {@code <BR>}) corresponding to tha t

1121 * span.

1122 * decorations: {Array.<number\|string} an array of style classes preceded

1123 * by the position at which they start in job.sourceCode in order

1124 * }</pre>

1125 * @private

1126 */

1127 function recombineTagsAndDecorations(job) {

1128 var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent);

1129 isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8;

1130 var newlineRe = /\n/g;

1131

1132 var source = job.sourceCode;

1133 var sourceLength = source.length;

1134 // Index into source after the last code-unit recombined.

1135 var sourceIndex = 0;

1136

1137 var spans = job.spans;

1138 var nSpans = spans.length;

1139 // Index into spans after the last span which ends at or before sourceIndex.

1140 var spanIndex = 0;

1141

1142 var decorations = job.decorations;

1143 var nDecorations = decorations.length;

1144 // Index into decorations after the last decoration which ends at or before

1145 // sourceIndex.

1146 var decorationIndex = 0;

1147

1148 // Remove all zero-length decorations.

1149 decorations[nDecorations] = sourceLength;

1150 var decPos, i;

1151 for (i = decPos = 0; i < nDecorations;) {

1152 if (decorations[i] !== decorations[i + 2]) {

1153 decorations[decPos++] = decorations[i++];

1154 decorations[decPos++] = decorations[i++];

1155 } else {

1156 i += 2;

1157 }

1158 }

1159 nDecorations = decPos;

1160

1161 // Simplify decorations.

1162 for (i = decPos = 0; i < nDecorations;) {

1163 var startPos = decorations[i];

1164 // Conflate all adjacent decorations that use the same style.

1165 var startDec = decorations[i + 1];

1166 var end = i + 2;

1167 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {

1168 end += 2;

1169 }

1170 decorations[decPos++] = startPos;

1171 decorations[decPos++] = startDec;

1172 i = end;

1173 }

1174

1175 nDecorations = decorations.length = decPos;

1176

1177 var sourceNode = job.sourceNode;

1178 var oldDisplay;

1179 if (sourceNode) {

1180 oldDisplay = sourceNode.style.display;

1181 sourceNode.style.display = 'none';

1182 }

1183 try {

1184 var decoration = null;

1185 while (spanIndex < nSpans) {

1186 var spanStart = spans[spanIndex];

1187 var spanEnd = spans[spanIndex + 2] \|\| sourceLength;

1188

1189 var decEnd = decorations[decorationIndex + 2] \|\| sourceLength;

1190

1191 var end = Math.min(spanEnd, decEnd);

1192

1193 var textNode = spans[spanIndex + 1];

1194 var styledText;

1195 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s

1196 // Don't introduce spans around empty text nodes.

1197 && (styledText = source.substring(sourceIndex, end))) {

1198 // This may seem bizarre, and it is. Emitting LF on IE causes the

1199 // code to display with spaces instead of line breaks.

1200 // Emitting Windows standard issue linebreaks (CRLF) causes a blank

1201 // space to appear at the beginning of every line but the first.

1202 // Emitting an old Mac OS 9 line separator makes everything spiffy.

1203 if (isIE8OrEarlier) {

1204 styledText = styledText.replace(newlineRe, '\r');

1205 }

1206 textNode.nodeValue = styledText;

1207 var document = textNode.ownerDocument;

1208 var span = document.createElement('span');

1209 span.className = decorations[decorationIndex + 1];

1210 var parentNode = textNode.parentNode;

1211 parentNode.replaceChild(span, textNode);

1212 span.appendChild(textNode);

1213 if (sourceIndex < spanEnd) { // Split off a text node.

1214 spans[spanIndex + 1] = textNode

1215 // TODO: Possibly optimize by using '' if there's no flicker.

1216 = document.createTextNode(source.substring(end, spanEnd));

1217 parentNode.insertBefore(textNode, span.nextSibling);

1218 }

1219 }

1220

1221 sourceIndex = end;

1222

1223 if (sourceIndex >= spanEnd) {

1224 spanIndex += 2;

1225 }

1226 if (sourceIndex >= decEnd) {

1227 decorationIndex += 2;

1228 }

1229 }

1230 } finally {

1231 if (sourceNode) {

1232 sourceNode.style.display = oldDisplay;

1233 }

1234 }

1235 }

1236

1237 /** Maps language-specific file extensions to handlers. */

1238 var langHandlerRegistry = {};

1239 /** Register a language handler for the given file extensions.

1240 * @param {function (Object)} handler a function from source code to a list

1241 * of decorations. Takes a single argument job which describes the

1242 * state of the computation. The single parameter has the form

1243 * {@code {

1244 * sourceCode: {string} as plain text.

1245 * decorations: {Array.<number\|string>} an array of style classes

1246 * preceded by the position at which they start in

1247 * job.sourceCode in order.

1248 * The language handler should assigned this field.

1249 * basePos: {int} the position of source in the larger source chunk.

1250 * All positions in the output decorations array are relative

1251 * to the larger source chunk.

1252 * } }

1253 * @param {Array.<string>} fileExtensions

1254 */

1255 function registerLangHandler(handler, fileExtensions) {

1256 for (var i = fileExtensions.length; --i >= 0;) {

1257 var ext = fileExtensions[i];

1258 if (!langHandlerRegistry.hasOwnProperty(ext)) {

1259 langHandlerRegistry[ext] = handler;

1260 } else if (win['console']) {

1261 console['warn']('cannot override language handler %s', ext);

1262 }

1263 }

1264 }

1265 function langHandlerForExtension(extension, source) {

1266 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {

1267 // Treat it as markup if the first non whitespace character is a < and

1268 // the last non-whitespace character is a >.

1269 extension = /^\s*</.test(source)

1270 ? 'default-markup'

1271 : 'default-code';

1272 }

1273 return langHandlerRegistry[extension];

1274 }

1275 registerLangHandler(decorateSource, ['default-code']);

1276 registerLangHandler(

1277 createSimpleLexer(

1278 [],

1279 [

1280 [PR_PLAIN, /^[^<?]+/],

1281 [PR_DECLARATION, /^<!\w[^>]*(?:>\|$)/],

1282 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->\|$)/],

1283 // Unescaped content in an unknown language

1284 ['lang-', /^<\?([\s\S]+?)(?:\?>\|$)/],

1285 ['lang-', /^<%([\s\S]+?)(?:%>\|$)/],

1286 [PR_PUNCTUATION, /^(?:<[%?]\|[%?]>)/],

1287 ['lang-', /^<xmp\b[^>]>([\s\S]+?)<\/xmp\b[^>]>/i],

1288 // Unescaped content in javascript. (Or possibly vbscript).

1289 ['lang-js', /^<script\b[^>]>([\s\S]?)(<\/script\b[^>]*>)/i],

1290 // Contains unescaped stylesheet content

1291 ['lang-css', /^<style\b[^>]>([\s\S]?)(<\/style\b[^>]*>)/i],

1292 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]

1293 ]),

1294 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);

1295 registerLangHandler(

1296 createSimpleLexer(

1297 [

1298 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],

1299 [PR_ATTRIB_VALUE, /^(?:\"[^\"]\"?\|\'[^\']\'?)/, null, '\"\'']

1300 ],

1301 [

1302 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?\|\/?>$/i],

1303 [PR_ATTRIB_NAME, /^(?!style[\s=]\|on)[a-z](?:[\w:-]*\w)?/i],

1304 ['lang-uq.val', /^=\s([^>\'\"\s](?:[^>\'\"\s\/]\|\/(?=\s)))/],

1305 [PR_PUNCTUATION, /^[=<>\/]+/],

1306 ['lang-js', /^on\w+\s=\s\"([^\"]+)\"/i],

1307 ['lang-js', /^on\w+\s=\s\'([^\']+)\'/i],

1308 ['lang-js', /^on\w+\s=\s([^\"\'>\s]+)/i],

1309 ['lang-css', /^style\s=\s\"([^\"]+)\"/i],

1310 ['lang-css', /^style\s=\s\'([^\']+)\'/i],

1311 ['lang-css', /^style\s=\s([^\"\'>\s]+)/i]

1312 ]),

1313 ['in.tag']);

1314 registerLangHandler(

1315 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);

1316 registerLangHandler(sourceDecorator({

1317 'keywords': CPP_KEYWORDS,

1318 'hashComments': true,

1319 'cStyleComments': true,

1320 'types': C_TYPES

1321 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);

1322 registerLangHandler(sourceDecorator({

1323 'keywords': 'null,true,false'

1324 }), ['json']);

1325 registerLangHandler(sourceDecorator({

1326 'keywords': CSHARP_KEYWORDS,

1327 'hashComments': true,

1328 'cStyleComments': true,

1329 'verbatimStrings': true,

1330 'types': C_TYPES

1331 }), ['cs']);

1332 registerLangHandler(sourceDecorator({

1333 'keywords': JAVA_KEYWORDS,

1334 'cStyleComments': true

1335 }), ['java']);

1336 registerLangHandler(sourceDecorator({

1337 'keywords': SH_KEYWORDS,

1338 'hashComments': true,

1339 'multiLineStrings': true

1340 }), ['bash', 'bsh', 'csh', 'sh']);

1341 registerLangHandler(sourceDecorator({

1342 'keywords': PYTHON_KEYWORDS,

1343 'hashComments': true,

1344 'multiLineStrings': true,

1345 'tripleQuotedStrings': true

1346 }), ['cv', 'py', 'python']);

1347 registerLangHandler(sourceDecorator({

1348 'keywords': PERL_KEYWORDS,

1349 'hashComments': true,

1350 'multiLineStrings': true,

1351 'regexLiterals': 2 // multiline regex literals

1352 }), ['perl', 'pl', 'pm']);

1353 registerLangHandler(sourceDecorator({

1354 'keywords': RUBY_KEYWORDS,

1355 'hashComments': true,

1356 'multiLineStrings': true,

1357 'regexLiterals': true

1358 }), ['rb', 'ruby']);

1359 registerLangHandler(sourceDecorator({

1360 'keywords': JSCRIPT_KEYWORDS,

1361 'cStyleComments': true,

1362 'regexLiterals': true

1363 }), ['javascript', 'js']);

1364 registerLangHandler(sourceDecorator({

1365 'keywords': COFFEE_KEYWORDS,

1366 'hashComments': 3, // ### style block comments

1367 'cStyleComments': true,

1368 'multilineStrings': true,

1369 'tripleQuotedStrings': true,

1370 'regexLiterals': true

1371 }), ['coffee']);

1372 registerLangHandler(sourceDecorator({

1373 'keywords': RUST_KEYWORDS,

1374 'cStyleComments': true,

1375 'multilineStrings': true

1376 }), ['rc', 'rs', 'rust']);

1377 registerLangHandler(

1378 createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);

1379

1380 function applyDecorator(job) {

1381 var opt_langExtension = job.langExtension;

1382

1383 try {

1384 // Extract tags, and convert the source code to plain text.

1385 var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre);

1386 /** Plain text. @type {string} */

1387 var source = sourceAndSpans.sourceCode;

1388 job.sourceCode = source;

1389 job.spans = sourceAndSpans.spans;

1390 job.basePos = 0;

1391

1392 // Apply the appropriate language handler

1393 langHandlerForExtension(opt_langExtension, source)(job);

1394

1395 // Integrate the decorations and tags back into the source code,

1396 // modifying the sourceNode in place.

1397 recombineTagsAndDecorations(job);

1398 } catch (e) {

1399 if (win['console']) {

1400 console['log'](e && e['stack'] \|\| e);

1401 }

1402 }

1403 }

1404

1405 /**

1406 * Pretty print a chunk of code.

1407 * @param sourceCodeHtml {string} The HTML to pretty print.

1408 * @param opt_langExtension {string} The language name to use.

1409 * Typically, a filename extension like 'cpp' or 'java'.

1410 * @param opt_numberLines {number\|boolean} True to number lines,

1411 * or the 1-indexed number of the first line in sourceCodeHtml.

1412 */

1413 function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {

1414 var container = document.createElement('div');

1415 // This could cause images to load and onload listeners to fire.

1416 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.

1417 // We assume that the inner HTML is from a trusted source.

1418 // The pre-tag is required for IE8 which strips newlines from innerHTML

1419 // when it is injected into a <pre> tag.

1420 // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when- setting-innerhtml-in-ie

1421 // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre- tag-ie-javascript

1422 container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>';

1423 container = container.firstChild;

1424 if (opt_numberLines) {

1425 numberLines(container, opt_numberLines, true);

1426 }

1427

1428 var job = {

1429 langExtension: opt_langExtension,

1430 numberLines: opt_numberLines,

1431 sourceNode: container,

1432 pre: 1

1433 };

1434 applyDecorator(job);

1435 return container.innerHTML;

1436 }

1437

1438 /**

1439 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with

1440 * {@code class=prettyprint} and prettify them.

1441 *

1442 * @param {Function} opt_whenDone called when prettifying is done.

1443 * @param {HTMLElement\|HTMLDocument} opt_root an element or document

1444 * containing all the elements to pretty print.

1445 * Defaults to {@code document.body}.

1446 */

1447 function $prettyPrint(opt_whenDone, opt_root) {

1448 var root = opt_root \|\| document.body;

1449 var doc = root.ownerDocument \|\| document;

1450 function byTagName(tn) { return root.getElementsByTagName(tn); }

1451 // fetch a list of nodes to rewrite

1452 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];

1453 var elements = [];

1454 for (var i = 0; i < codeSegments.length; ++i) {

1455 for (var j = 0, n = codeSegments[i].length; j < n; ++j) {

1456 elements.push(codeSegments[i][j]);

1457 }

1458 }

1459 codeSegments = null;

1460

1461 var clock = Date;

1462 if (!clock['now']) {

1463 clock = { 'now': function () { return +(new Date); } };

1464 }

1465

1466 // The loop is broken into a series of continuations to make sure that we

1467 // don't make the browser unresponsive when rewriting a large page.

1468 var k = 0;

1469 var prettyPrintingJob;

1470

1471 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;

1472 var prettyPrintRe = /\bprettyprint\b/;

1473 var prettyPrintedRe = /\bprettyprinted\b/;

1474 var preformattedTagNameRe = /pre\|xmp/i;

1475 var codeRe = /^code$/i;

1476 var preCodeXmpRe = /^(?:pre\|code\|xmp)$/i;

1477 var EMPTY = {};

1478

1479 function doWork() {

1480 var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ?

1481 clock['now']() + 250 /* ms */ :

1482 Infinity);

1483 for (; k < elements.length && clock['now']() < endTime; k++) {

1484 var cs = elements[k];

1485

1486 // Look for a preceding comment like

1487 // <?prettify lang="..." linenums="..."?>

1488 var attrs = EMPTY;

1489 {

1490 for (var preceder = cs; (preceder = preceder.previousSibling);) {

1491 var nt = preceder.nodeType;

1492 // <?foo?> is parsed by HTML 5 to a comment node (8)

1493 // like <!--?foo?-->, but in XML is a processing instruction

1494 var value = (nt === 7 \|\| nt === 8) && preceder.nodeValue;

1495 if (value

1496 ? !/^\??prettify\b/.test(value)

1497 : (nt !== 3 \|\| /\S/.test(preceder.nodeValue))) {

1498 // Skip over white-space text nodes but not others.

1499 break;

1500 }

1501 if (value) {

1502 attrs = {};

1503 value.replace(

1504 /\b(\w+)=([\w:.%+-]+)/g,

1505 function (_, name, value) { attrs[name] = value; });

1506 break;

1507 }

1508 }

1509 }

1510

1511 var className = cs.className;

1512 if ((attrs !== EMPTY \|\| prettyPrintRe.test(className))

1513 // Don't redo this if we've already done it.

1514 // This allows recalling pretty print to just prettyprint elements

1515 // that have been added to the page since last call.

1516 && !prettyPrintedRe.test(className)) {

1517

1518 // make sure this is not nested in an already prettified element

1519 var nested = false;

1520 for (var p = cs.parentNode; p; p = p.parentNode) {

1521 var tn = p.tagName;

1522 if (preCodeXmpRe.test(tn)

1523 && p.className && prettyPrintRe.test(p.className)) {

1524 nested = true;

1525 break;

1526 }

1527 }

1528 if (!nested) {

1529 // Mark done. If we fail to prettyprint for whatever reason,

1530 // we shouldn't try again.

1531 cs.className += ' prettyprinted';

1532

1533 // If the classes includes a language extensions, use it.

1534 // Language extensions can be specified like

1535 // <pre class="prettyprint lang-cpp">

1536 // the language extension "cpp" is used to find a language handler

1537 // as passed to PR.registerLangHandler.

1538 // HTML5 recommends that a language be specified using "language-"

1539 // as the prefix instead. Google Code Prettify supports both.

1540 // http://dev.w3.org/html5/spec-author-view/the-code-element.html

1541 var langExtension = attrs['lang'];

1542 if (!langExtension) {

1543 langExtension = className.match(langExtensionRe);

1544 // Support <pre class="prettyprint"><code class="language-c">

1545 var wrapper;

1546 if (!langExtension && (wrapper = childContentWrapper(cs))

1547 && codeRe.test(wrapper.tagName)) {

1548 langExtension = wrapper.className.match(langExtensionRe);

1549 }

1550

1551 if (langExtension) { langExtension = langExtension[1]; }

1552 }

1553

1554 var preformatted;

1555 if (preformattedTagNameRe.test(cs.tagName)) {

1556 preformatted = 1;

1557 } else {

1558 var currentStyle = cs['currentStyle'];

1559 var defaultView = doc.defaultView;

1560 var whitespace = (

1561 currentStyle

1562 ? currentStyle['whiteSpace']

1563 : (defaultView

1564 && defaultView.getComputedStyle)

1565 ? defaultView.getComputedStyle(cs, null)

1566 .getPropertyValue('white-space')

1567 : 0);

1568 preformatted = whitespace

1569 && 'pre' === whitespace.substring(0, 3);

1570 }

1571

1572 // Look for a class like linenums or linenums:<n> where <n> is the

1573 // 1-indexed number of the first line.

1574 var lineNums = attrs['linenums'];

1575 if (!(lineNums = lineNums === 'true' \|\| +lineNums)) {

1576 lineNums = className.match(/\blinenums\b(?::(\d+))?/);

1577 lineNums =

1578 lineNums

1579 ? lineNums[1] && lineNums[1].length

1580 ? +lineNums[1] : true

1581 : false;

1582 }

1583 if (lineNums) { numberLines(cs, lineNums, preformatted); }

1584

1585 // do the pretty printing

1586 prettyPrintingJob = {

1587 langExtension: langExtension,

1588 sourceNode: cs,

1589 numberLines: lineNums,

1590 pre: preformatted

1591 };

1592 applyDecorator(prettyPrintingJob);

1593 }

1594 }

1595 }

1596 if (k < elements.length) {

1597 // finish up in a continuation

1598 setTimeout(doWork, 250);

1599 } else if ('function' === typeof opt_whenDone) {

1600 opt_whenDone();

1601 }

1602 }

1603

1604 doWork();

1605 }

1606

1607 /**

1608 * Contains functions for creating and registering new language handlers.

1609 * @type {Object}

1610 */

1611 var PR = win['PR'] = {

1612 'createSimpleLexer': createSimpleLexer,

1613 'registerLangHandler': registerLangHandler,

1614 'sourceDecorator': sourceDecorator,

1615 'PR_ATTRIB_NAME': PR_ATTRIB_NAME,

1616 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,

1617 'PR_COMMENT': PR_COMMENT,

1618 'PR_DECLARATION': PR_DECLARATION,

1619 'PR_KEYWORD': PR_KEYWORD,

1620 'PR_LITERAL': PR_LITERAL,

1621 'PR_NOCODE': PR_NOCODE,

1622 'PR_PLAIN': PR_PLAIN,

1623 'PR_PUNCTUATION': PR_PUNCTUATION,

1624 'PR_SOURCE': PR_SOURCE,

1625 'PR_STRING': PR_STRING,

1626 'PR_TAG': PR_TAG,

1627 'PR_TYPE': PR_TYPE,

1628 'prettyPrintOne':

1629 IN_GLOBAL_SCOPE

1630 ? (win['prettyPrintOne'] = $prettyPrintOne)

1631 : (prettyPrintOne = $prettyPrintOne),

1632 'prettyPrint': prettyPrint =

1633 IN_GLOBAL_SCOPE

1634 ? (win['prettyPrint'] = $prettyPrint)

1635 : (prettyPrint = $prettyPrint)

1636 };

1637

1638 // Make PR available via the Asynchronous Module Definition (AMD) API.

1639 // Per https://github.com/amdjs/amdjs-api/wiki/AMD:

1640 // The Asynchronous Module Definition (AMD) API specifies a

1641 // mechanism for defining modules such that the module and its

1642 // dependencies can be asynchronously loaded.

1643 // ...

1644 // To allow a clear indicator that a global define function (as

1645 // needed for script src browser loading) conforms to the AMD API,

1646 // any global define function SHOULD have a property called "amd"

1647 // whose value is an object. This helps avoid conflict with any

1648 // other existing JavaScript code that could have defined a define()

1649 // function that does not conform to the AMD API.

1650 if (typeof define === "function" && define['amd']) {

1651 define("google-code-prettify", [], function () {

1652 return PR;

1653 });

1654 }

1655 })();

OLD	NEW

« no previous file with comments | « bower_components/google-code-prettify/src/prettify.css ('k') | bower_components/google-code-prettify/src/run_prettify.js » ('j') | no next file with comments »

Powered by Google App Engine

This is Rietveld 408576698