OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # -*- coding: ascii -*- | |
3 # | |
4 # Copyright 2011 - 2013 | |
5 # Andr\xe9 Malo or his licensors, as applicable | |
6 # | |
7 # Licensed under the Apache License, Version 2.0 (the "License"); | |
8 # you may not use this file except in compliance with the License. | |
9 # You may obtain a copy of the License at | |
10 # | |
11 # http://www.apache.org/licenses/LICENSE-2.0 | |
12 # | |
13 # Unless required by applicable law or agreed to in writing, software | |
14 # distributed under the License is distributed on an "AS IS" BASIS, | |
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
16 # See the License for the specific language governing permissions and | |
17 # limitations under the License. | |
18 r""" | |
19 ===================== | |
20 Javascript Minifier | |
21 ===================== | |
22 | |
23 rJSmin is a javascript minifier written in python. | |
24 | |
25 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\. | |
26 | |
27 The module is a re-implementation aiming for speed, so it can be used at | |
28 runtime (rather than during a preprocessing step). Usually it produces the | |
29 same results as the original ``jsmin.c``. It differs in the following ways: | |
30 | |
31 - there is no error detection: unterminated string, regex and comment | |
32 literals are treated as regular javascript code and minified as such. | |
33 - Control characters inside string and regex literals are left untouched; they | |
34 are not converted to spaces (nor to \n) | |
35 - Newline characters are not allowed inside string and regex literals, except | |
36 for line continuations in string literals (ECMA-5). | |
37 - "return /regex/" is recognized correctly. | |
38 - "+ +" and "- -" sequences are not collapsed to '++' or '--' | |
39 - Newlines before ! operators are removed more sensibly | |
40 - rJSmin does not handle streams, but only complete strings. (However, the | |
41 module provides a "streamy" interface). | |
42 | |
43 Since most parts of the logic are handled by the regex engine it's way | |
44 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed | |
45 factor varies between about 6 and 55 depending on input and python version | |
46 (it gets faster the more compressed the input already is). Compared to the | |
47 speed-refactored python port by Dave St.Germain the performance gain is less | |
48 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for | |
49 details. | |
50 | |
51 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. | |
52 | |
53 Both python 2 and python 3 are supported. | |
54 | |
55 .. _jsmin.c by Douglas Crockford: | |
56 http://www.crockford.com/javascript/jsmin.c | |
57 """ | |
58 __author__ = "Andr\xe9 Malo" | |
59 __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1') | |
60 __docformat__ = "restructuredtext en" | |
61 __license__ = "Apache License, Version 2.0" | |
62 __version__ = '1.0.7' | |
63 __all__ = ['jsmin'] | |
64 | |
65 import re as _re | |
66 | |
67 | |
68 def _make_jsmin(python_only=False): | |
69 """ | |
70 Generate JS minifier based on `jsmin.c by Douglas Crockford`_ | |
71 | |
72 .. _jsmin.c by Douglas Crockford: | |
73 http://www.crockford.com/javascript/jsmin.c | |
74 | |
75 :Parameters: | |
76 `python_only` : ``bool`` | |
77 Use only the python variant. If true, the c extension is not even | |
78 tried to be loaded. | |
79 | |
80 :Return: Minifier | |
81 :Rtype: ``callable`` | |
82 """ | |
83 # pylint: disable = R0912, R0914, W0612 | |
84 if not python_only: | |
85 try: | |
86 import _rjsmin | |
87 except ImportError: | |
88 pass | |
89 else: | |
90 return _rjsmin.jsmin | |
91 try: | |
92 xrange | |
93 except NameError: | |
94 xrange = range # pylint: disable = W0622 | |
95 | |
96 space_chars = r'[\000-\011\013\014\016-\040]' | |
97 | |
98 line_comment = r'(?://[^\r\n]*)' | |
99 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' | |
100 string1 = \ | |
101 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' | |
102 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' | |
103 strings = r'(?:%s|%s)' % (string1, string2) | |
104 | |
105 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' | |
106 nospecial = r'[^/\\\[\r\n]' | |
107 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( | |
108 nospecial, charclass, nospecial | |
109 ) | |
110 space = r'(?:%s|%s)' % (space_chars, space_comment) | |
111 newline = r'(?:%s?[\r\n])' % line_comment | |
112 | |
113 def fix_charclass(result): | |
114 """ Fixup string of chars to fit into a regex char class """ | |
115 pos = result.find('-') | |
116 if pos >= 0: | |
117 result = r'%s%s-' % (result[:pos], result[pos + 1:]) | |
118 | |
119 def sequentize(string): | |
120 """ | |
121 Notate consecutive characters as sequence | |
122 | |
123 (1-4 instead of 1234) | |
124 """ | |
125 first, last, result = None, None, [] | |
126 for char in map(ord, string): | |
127 if last is None: | |
128 first = last = char | |
129 elif last + 1 == char: | |
130 last = char | |
131 else: | |
132 result.append((first, last)) | |
133 first = last = char | |
134 if last is not None: | |
135 result.append((first, last)) | |
136 return ''.join(['%s%s%s' % ( | |
137 chr(first), | |
138 last > first + 1 and '-' or '', | |
139 last != first and chr(last) or '' | |
140 ) for first, last in result]) | |
141 | |
142 return _re.sub(r'([\000-\040\047])', # for better portability | |
143 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result) | |
144 .replace('\\', '\\\\') | |
145 .replace('[', '\\[') | |
146 .replace(']', '\\]') | |
147 ) | |
148 ) | |
149 | |
150 def id_literal_(what): | |
151 """ Make id_literal like char class """ | |
152 match = _re.compile(what).match | |
153 result = ''.join([ | |
154 chr(c) for c in xrange(127) if not match(chr(c)) | |
155 ]) | |
156 return '[^%s]' % fix_charclass(result) | |
157 | |
158 def not_id_literal_(keep): | |
159 """ Make negated id_literal like char class """ | |
160 match = _re.compile(id_literal_(keep)).match | |
161 result = ''.join([ | |
162 chr(c) for c in xrange(127) if not match(chr(c)) | |
163 ]) | |
164 return r'[%s]' % fix_charclass(result) | |
165 | |
166 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') | |
167 preregex1 = r'[(,=:\[!&|?{};\r\n]' | |
168 preregex2 = r'%(not_id_literal)sreturn' % locals() | |
169 | |
170 id_literal = id_literal_(r'[a-zA-Z0-9_$]') | |
171 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') | |
172 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') | |
173 | |
174 dull = r'[^\047"/\000-\040]' | |
175 | |
176 space_sub = _re.compile(( | |
177 r'(%(dull)s+)' | |
178 r'|(%(strings)s%(dull)s*)' | |
179 r'|(?<=%(preregex1)s)' | |
180 r'%(space)s*(?:%(newline)s%(space)s*)*' | |
181 r'(%(regex)s%(dull)s*)' | |
182 r'|(?<=%(preregex2)s)' | |
183 r'%(space)s*(?:%(newline)s%(space)s)*' | |
184 r'(%(regex)s%(dull)s*)' | |
185 r'|(?<=%(id_literal_close)s)' | |
186 r'%(space)s*(?:(%(newline)s)%(space)s*)+' | |
187 r'(?=%(id_literal_open)s)' | |
188 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' | |
189 r'|(?<=\+)(%(space)s)+(?=\+)' | |
190 r'|(?<=-)(%(space)s)+(?=-)' | |
191 r'|%(space)s+' | |
192 r'|(?:%(newline)s%(space)s*)+' | |
193 ) % locals()).sub | |
194 #print space_sub.__self__.pattern | |
195 | |
196 def space_subber(match): | |
197 """ Substitution callback """ | |
198 # pylint: disable = C0321, R0911 | |
199 groups = match.groups() | |
200 if groups[0]: return groups[0] | |
201 elif groups[1]: return groups[1] | |
202 elif groups[2]: return groups[2] | |
203 elif groups[3]: return groups[3] | |
204 elif groups[4]: return '\n' | |
205 elif groups[5] or groups[6] or groups[7]: return ' ' | |
206 else: return '' | |
207 | |
208 def jsmin(script): # pylint: disable = W0621 | |
209 r""" | |
210 Minify javascript based on `jsmin.c by Douglas Crockford`_\. | |
211 | |
212 Instead of parsing the stream char by char, it uses a regular | |
213 expression approach which minifies the whole script with one big | |
214 substitution regex. | |
215 | |
216 .. _jsmin.c by Douglas Crockford: | |
217 http://www.crockford.com/javascript/jsmin.c | |
218 | |
219 :Parameters: | |
220 `script` : ``str`` | |
221 Script to minify | |
222 | |
223 :Return: Minified script | |
224 :Rtype: ``str`` | |
225 """ | |
226 return space_sub(space_subber, '\n%s\n' % script).strip() | |
227 | |
228 return jsmin | |
229 | |
230 jsmin = _make_jsmin() | |
231 | |
232 | |
233 def jsmin_for_posers(script): | |
234 r""" | |
235 Minify javascript based on `jsmin.c by Douglas Crockford`_\. | |
236 | |
237 Instead of parsing the stream char by char, it uses a regular | |
238 expression approach which minifies the whole script with one big | |
239 substitution regex. | |
240 | |
241 .. _jsmin.c by Douglas Crockford: | |
242 http://www.crockford.com/javascript/jsmin.c | |
243 | |
244 :Warning: This function is the digest of a _make_jsmin() call. It just | |
245 utilizes the resulting regex. It's just for fun here and may | |
246 vanish any time. Use the `jsmin` function instead. | |
247 | |
248 :Parameters: | |
249 `script` : ``str`` | |
250 Script to minify | |
251 | |
252 :Return: Minified script | |
253 :Rtype: ``str`` | |
254 """ | |
255 def subber(match): | |
256 """ Substitution callback """ | |
257 groups = match.groups() | |
258 return ( | |
259 groups[0] or | |
260 groups[1] or | |
261 groups[2] or | |
262 groups[3] or | |
263 (groups[4] and '\n') or | |
264 (groups[5] and ' ') or | |
265 (groups[6] and ' ') or | |
266 (groups[7] and ' ') or | |
267 '' | |
268 ) | |
269 | |
270 return _re.sub( | |
271 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?' | |
272 r'\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|' | |
273 r'\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r\n])(?' | |
274 r':[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*' | |
275 r'(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*' | |
276 r'[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(' | |
277 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[' | |
278 r'\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return' | |
279 r')(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/' | |
280 r'))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:' | |
281 r'/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?' | |
282 r':(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/' | |
283 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|' | |
284 r'~])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)' | |
285 r'*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]' | |
286 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./' | |
287 r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\01' | |
288 r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:' | |
289 r'-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*' | |
290 r'\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-' | |
291 r'\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013' | |
292 r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?:(?://[^' | |
293 r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^' | |
294 r'/*][^*]*\*+)*/))*)+', subber, '\n%s\n' % script | |
295 ).strip() | |
296 | |
297 | |
298 if __name__ == '__main__': | |
299 import sys as _sys | |
300 _sys.stdout.write(jsmin(_sys.stdin.read())) | |
OLD | NEW |