OLD | NEW |
| (Empty) |
1 // Copyright (C) 2009 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 | |
16 | |
17 /** | |
18 * @fileoverview | |
19 * Registers a language handler for CSS. | |
20 * | |
21 * | |
22 * To use, include prettify.js and this file in your HTML page. | |
23 * Then put your code in an HTML tag like | |
24 * <pre class="prettyprint lang-css"></pre> | |
25 * | |
26 * | |
27 * http://www.w3.org/TR/CSS21/grammar.html Section G2 defines the lexical | |
28 * grammar. This scheme does not recognize keywords containing escapes. | |
29 * | |
30 * @author mikesamuel@gmail.com | |
31 */ | |
32 | |
33 // This file is a call to a function defined in prettify.js which defines a | |
34 // lexical scanner for CSS and maps tokens to styles. | |
35 | |
36 // The call to PR['registerLangHandler'] is quoted so that Closure Compiler | |
37 // will not rename the call so that this language extensions can be | |
38 // compiled/minified separately from one another. Other symbols defined in | |
39 // prettify.js are similarly quoted. | |
40 | |
41 // The call is structured thus: | |
42 // PR['registerLangHandler']( | |
43 // PR['createSimpleLexer']( | |
44 // shortcutPatterns, | |
45 // fallThroughPatterns), | |
46 // [languageId0, ..., languageIdN]) | |
47 | |
48 // Langugage IDs | |
49 // ============= | |
50 // The language IDs are typically the file extensions of source files for | |
51 // that language so that users can syntax highlight arbitrary files based | |
52 // on just the extension. This is heuristic, but works pretty well in | |
53 // practice. | |
54 | |
55 // Patterns | |
56 // ======== | |
57 // Lexers are typically implemented as a set of regular expressions. | |
58 // The SimpleLexer function takes regular expressions, styles, and some | |
59 // pragma-info and produces a lexer. A token description looks like | |
60 // [STYLE_NAME, /regular-expression/, pragmas] | |
61 | |
62 // Initially, simple lexer's inner loop looked like: | |
63 | |
64 // while sourceCode is not empty: | |
65 // try each regular expression in order until one matches | |
66 // remove the matched portion from sourceCode | |
67 | |
68 // This was really slow for large files because some JS interpreters | |
69 // do a buffer copy on the matched portion which is O(n*n) | |
70 | |
71 // The current loop now looks like | |
72 | |
73 // 1. use js-modules/combinePrefixPatterns.js to | |
74 // combine all regular expressions into one | |
75 // 2. use a single global regular expresion match to extract all tokens | |
76 // 3. for each token try regular expressions in order until one matches it | |
77 // and classify it using the associated style | |
78 | |
79 // This is a lot more efficient but it does mean that lookahead and lookbehind | |
80 // can't be used across boundaries to classify tokens. | |
81 | |
82 // Sometimes we need lookahead and lookbehind and sometimes we want to handle | |
83 // embedded language -- JavaScript or CSS embedded in HTML, or inline assembly | |
84 // in C. | |
85 | |
86 // If a particular pattern has a numbered group, and its style pattern starts | |
87 // with "lang-" as in | |
88 // ['lang-js', /<script>(.*?)<\/script>/] | |
89 // then the token classification step breaks the token into pieces. | |
90 // Group 1 is re-parsed using the language handler for "lang-js", and the | |
91 // surrounding portions are reclassified using the current language handler. | |
92 // This mechanism gives us both lookahead, lookbehind, and language embedding. | |
93 | |
94 // Shortcut Patterns | |
95 // ================= | |
96 // A shortcut pattern is one that is tried before other patterns if the first | |
97 // character in the token is in the string of characters. | |
98 // This very effectively lets us make quick correct decisions for common token | |
99 // types. | |
100 | |
101 // All other patterns are fall-through patterns. | |
102 | |
103 | |
104 | |
105 // The comments inline below refer to productions in the CSS specification's | |
106 // lexical grammar. See link above. | |
107 PR['registerLangHandler']( | |
108 PR['createSimpleLexer']( | |
109 // Shortcut patterns. | |
110 [ | |
111 // The space production <s> | |
112 [PR['PR_PLAIN'], /^[ \t\r\n\f]+/, null, ' \t\r\n\f'] | |
113 ], | |
114 // Fall-through patterns. | |
115 [ | |
116 // Quoted strings. <string1> and <string2> | |
117 [PR['PR_STRING'], | |
118 /^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/, null], | |
119 [PR['PR_STRING'], | |
120 /^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/, null], | |
121 ['lang-css-str', /^url\(([^\)\"\']+)\)/i], | |
122 [PR['PR_KEYWORD'], | |
123 /^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-
\w]|$)/i, | |
124 null], | |
125 // A property name -- an identifier followed by a colon. | |
126 ['lang-css-kw', /^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\
[0-9a-f]+ ?))*)\s*:/i], | |
127 // A C style block comment. The <comment> production. | |
128 [PR['PR_COMMENT'], /^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//], | |
129 // Escaping text spans | |
130 [PR['PR_COMMENT'], /^(?:<!--|-->)/], | |
131 // A number possibly containing a suffix. | |
132 [PR['PR_LITERAL'], /^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i], | |
133 // A hex color | |
134 [PR['PR_LITERAL'], /^#(?:[0-9a-f]{3}){1,2}\b/i], | |
135 // An identifier | |
136 [PR['PR_PLAIN'], | |
137 /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i], | |
138 // A run of punctuation | |
139 [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/] | |
140 ]), | |
141 ['css']); | |
142 // Above we use embedded languages to highlight property names (identifiers | |
143 // followed by a colon) differently from identifiers in values. | |
144 PR['registerLangHandler']( | |
145 PR['createSimpleLexer']([], | |
146 [ | |
147 [PR['PR_KEYWORD'], | |
148 /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i] | |
149 ]), | |
150 ['css-kw']); | |
151 // The content of an unquoted URL literal like url(http://foo/img.png) should | |
152 // be colored as string content. This language handler is used above in the | |
153 // URL production to do so. | |
154 PR['registerLangHandler']( | |
155 PR['createSimpleLexer']([], | |
156 [ | |
157 [PR['PR_STRING'], /^[^\)\"\']+/] | |
158 ]), | |
159 ['css-str']); | |
OLD | NEW |