| OLD | NEW |
| (Empty) |
| 1 // Copyright (C) 2009 Google Inc. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 | |
| 15 | |
| 16 | |
| 17 /** | |
| 18 * @fileoverview | |
| 19 * Registers a language handler for CSS. | |
| 20 * | |
| 21 * | |
| 22 * To use, include prettify.js and this file in your HTML page. | |
| 23 * Then put your code in an HTML tag like | |
| 24 * <pre class="prettyprint lang-css"></pre> | |
| 25 * | |
| 26 * | |
| 27 * http://www.w3.org/TR/CSS21/grammar.html Section G2 defines the lexical | |
| 28 * grammar. This scheme does not recognize keywords containing escapes. | |
| 29 * | |
| 30 * @author mikesamuel@gmail.com | |
| 31 */ | |
| 32 | |
| 33 // This file is a call to a function defined in prettify.js which defines a | |
| 34 // lexical scanner for CSS and maps tokens to styles. | |
| 35 | |
| 36 // The call to PR['registerLangHandler'] is quoted so that Closure Compiler | |
| 37 // will not rename the call so that this language extensions can be | |
| 38 // compiled/minified separately from one another. Other symbols defined in | |
| 39 // prettify.js are similarly quoted. | |
| 40 | |
| 41 // The call is structured thus: | |
| 42 // PR['registerLangHandler']( | |
| 43 // PR['createSimpleLexer']( | |
| 44 // shortcutPatterns, | |
| 45 // fallThroughPatterns), | |
| 46 // [languageId0, ..., languageIdN]) | |
| 47 | |
| 48 // Langugage IDs | |
| 49 // ============= | |
| 50 // The language IDs are typically the file extensions of source files for | |
| 51 // that language so that users can syntax highlight arbitrary files based | |
| 52 // on just the extension. This is heuristic, but works pretty well in | |
| 53 // practice. | |
| 54 | |
| 55 // Patterns | |
| 56 // ======== | |
| 57 // Lexers are typically implemented as a set of regular expressions. | |
| 58 // The SimpleLexer function takes regular expressions, styles, and some | |
| 59 // pragma-info and produces a lexer. A token description looks like | |
| 60 // [STYLE_NAME, /regular-expression/, pragmas] | |
| 61 | |
| 62 // Initially, simple lexer's inner loop looked like: | |
| 63 | |
| 64 // while sourceCode is not empty: | |
| 65 // try each regular expression in order until one matches | |
| 66 // remove the matched portion from sourceCode | |
| 67 | |
| 68 // This was really slow for large files because some JS interpreters | |
| 69 // do a buffer copy on the matched portion which is O(n*n) | |
| 70 | |
| 71 // The current loop now looks like | |
| 72 | |
| 73 // 1. use js-modules/combinePrefixPatterns.js to | |
| 74 // combine all regular expressions into one | |
| 75 // 2. use a single global regular expresion match to extract all tokens | |
| 76 // 3. for each token try regular expressions in order until one matches it | |
| 77 // and classify it using the associated style | |
| 78 | |
| 79 // This is a lot more efficient but it does mean that lookahead and lookbehind | |
| 80 // can't be used across boundaries to classify tokens. | |
| 81 | |
| 82 // Sometimes we need lookahead and lookbehind and sometimes we want to handle | |
| 83 // embedded language -- JavaScript or CSS embedded in HTML, or inline assembly | |
| 84 // in C. | |
| 85 | |
| 86 // If a particular pattern has a numbered group, and its style pattern starts | |
| 87 // with "lang-" as in | |
| 88 // ['lang-js', /<script>(.*?)<\/script>/] | |
| 89 // then the token classification step breaks the token into pieces. | |
| 90 // Group 1 is re-parsed using the language handler for "lang-js", and the | |
| 91 // surrounding portions are reclassified using the current language handler. | |
| 92 // This mechanism gives us both lookahead, lookbehind, and language embedding. | |
| 93 | |
| 94 // Shortcut Patterns | |
| 95 // ================= | |
| 96 // A shortcut pattern is one that is tried before other patterns if the first | |
| 97 // character in the token is in the string of characters. | |
| 98 // This very effectively lets us make quick correct decisions for common token | |
| 99 // types. | |
| 100 | |
| 101 // All other patterns are fall-through patterns. | |
| 102 | |
| 103 | |
| 104 | |
| 105 // The comments inline below refer to productions in the CSS specification's | |
| 106 // lexical grammar. See link above. | |
| 107 PR['registerLangHandler']( | |
| 108 PR['createSimpleLexer']( | |
| 109 // Shortcut patterns. | |
| 110 [ | |
| 111 // The space production <s> | |
| 112 [PR['PR_PLAIN'], /^[ \t\r\n\f]+/, null, ' \t\r\n\f'] | |
| 113 ], | |
| 114 // Fall-through patterns. | |
| 115 [ | |
| 116 // Quoted strings. <string1> and <string2> | |
| 117 [PR['PR_STRING'], | |
| 118 /^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/, null], | |
| 119 [PR['PR_STRING'], | |
| 120 /^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/, null], | |
| 121 ['lang-css-str', /^url\(([^\)\"\']+)\)/i], | |
| 122 [PR['PR_KEYWORD'], | |
| 123 /^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-
\w]|$)/i, | |
| 124 null], | |
| 125 // A property name -- an identifier followed by a colon. | |
| 126 ['lang-css-kw', /^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\
[0-9a-f]+ ?))*)\s*:/i], | |
| 127 // A C style block comment. The <comment> production. | |
| 128 [PR['PR_COMMENT'], /^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//], | |
| 129 // Escaping text spans | |
| 130 [PR['PR_COMMENT'], /^(?:<!--|-->)/], | |
| 131 // A number possibly containing a suffix. | |
| 132 [PR['PR_LITERAL'], /^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i], | |
| 133 // A hex color | |
| 134 [PR['PR_LITERAL'], /^#(?:[0-9a-f]{3}){1,2}\b/i], | |
| 135 // An identifier | |
| 136 [PR['PR_PLAIN'], | |
| 137 /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i], | |
| 138 // A run of punctuation | |
| 139 [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/] | |
| 140 ]), | |
| 141 ['css']); | |
| 142 // Above we use embedded languages to highlight property names (identifiers | |
| 143 // followed by a colon) differently from identifiers in values. | |
| 144 PR['registerLangHandler']( | |
| 145 PR['createSimpleLexer']([], | |
| 146 [ | |
| 147 [PR['PR_KEYWORD'], | |
| 148 /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i] | |
| 149 ]), | |
| 150 ['css-kw']); | |
| 151 // The content of an unquoted URL literal like url(http://foo/img.png) should | |
| 152 // be colored as string content. This language handler is used above in the | |
| 153 // URL production to do so. | |
| 154 PR['registerLangHandler']( | |
| 155 PR['createSimpleLexer']([], | |
| 156 [ | |
| 157 [PR['PR_STRING'], /^[^\)\"\']+/] | |
| 158 ]), | |
| 159 ['css-str']); | |
| OLD | NEW |