OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python2.4 |
2 | 2 |
3 # This code is original from jsmin by Douglas Crockford, it was translated to | 3 # Copyright 2009 the V8 project authors. All rights reserved. |
4 # Python by Baruch Even. The original code had the following copyright and | 4 # Redistribution and use in source and binary forms, with or without |
5 # license. | 5 # modification, are permitted provided that the following conditions are |
6 # met: | |
6 # | 7 # |
7 # /* jsmin.c | 8 # * Redistributions of source code must retain the above copyright |
8 # 2007-05-22 | 9 # notice, this list of conditions and the following disclaimer. |
10 # * Redistributions in binary form must reproduce the above | |
11 # copyright notice, this list of conditions and the following | |
12 # disclaimer in the documentation and/or other materials provided | |
13 # with the distribution. | |
14 # * Neither the name of Google Inc. nor the names of its | |
15 # contributors may be used to endorse or promote products derived | |
16 # from this software without specific prior written permission. | |
9 # | 17 # |
10 # Copyright (c) 2002 Douglas Crockford (www.crockford.com) | 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
11 # | 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
12 # Permission is hereby granted, free of charge, to any person obtaining a copy o f | 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
13 # this software and associated documentation files (the "Software"), to deal in | 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
14 # the Software without restriction, including without limitation the rights to | 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
15 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
16 # of the Software, and to permit persons to whom the Software is furnished to do | 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
17 # so, subject to the following conditions: | 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
18 # | 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
19 # The above copyright notice and this permission notice shall be included in all | 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
20 # copies or substantial portions of the Software. | 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
21 # | 29 |
22 # The Software shall be used for Good, not Evil. | 30 # Suppress copyright warning: pylint: disable-msg=C6304 |
Christian Plesner Hansen
2009/09/23 12:19:35
I would suggest removing this and just letting pyl
| |
23 # | 31 |
24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 32 """A JavaScript minifier. |
25 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 33 |
26 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 34 It is far from being a complete JS parser, so there are many valid |
27 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 35 JavaScript programs that will be ruined by it. Another strangeness is that |
28 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 36 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip |
29 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 37 out blank lines in order to ease debugging. Variables at the top scope are |
30 # SOFTWARE. | 38 properties of the global object so we can't rename them. It is assumed that |
31 # */ | 39 you introduce variables with var as if JavaScript followed C++ scope rules |
32 | 40 around curly braces, so the declaration must be above the first use. |
33 from StringIO import StringIO | 41 |
34 | 42 Use as: |
35 def jsmin(js): | 43 import jsmin |
36 ins = StringIO(js) | 44 minifier = JavaScriptMinifier() |
37 outs = StringIO() | 45 program1 = minifier.JSMinify(program1) |
38 JavascriptMinify().minify(ins, outs) | 46 program2 = minifier.JSMinify(program2) |
39 str = outs.getvalue() | 47 """ |
40 if len(str) > 0 and str[0] == '\n': | 48 |
41 str = str[1:] | 49 import re |
42 return str | 50 |
43 | 51 |
44 def isAlphanum(c): | 52 class JavaScriptMinifier(object): |
45 """return true if the character is a letter, digit, underscore, | 53 """An object that you can feed code snippets to to get them minified.""" |
46 dollar sign, or non-ASCII character. | 54 |
47 """ | 55 def __init__(self): |
48 return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or | 56 # We prepopulate the list of identifiers that shouldn't be used. These |
49 (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c i s not None and ord(c) > 126)); | 57 # short language keywords could otherwise be used by the script as variable |
50 | 58 # names. |
51 class UnterminatedComment(Exception): | 59 self.seen_identifiers = {"do": True, "in": True} |
52 pass | 60 self.identifier_counter = 0 |
53 | 61 self.in_comment = False |
54 class UnterminatedStringLiteral(Exception): | 62 self.map = {} |
55 pass | 63 self.nesting = 0 |
56 | 64 |
57 class UnterminatedRegularExpression(Exception): | 65 def LookAtIdentifier(self, m): |
58 pass | 66 """Records identifiers or keywords that we see in use.""" |
59 | 67 # (So we can avoid renaming variables to these strings.) |
60 class JavascriptMinify(object): | 68 identifier = m.group(1) |
61 | 69 self.seen_identifiers[identifier] = True |
62 def _outA(self): | 70 |
63 self.outstream.write(self.theA) | 71 def Push(self): |
64 def _outB(self): | 72 """Called when we encounter a '{'.""" |
65 self.outstream.write(self.theB) | 73 self.nesting += 1 |
66 | 74 |
67 def _get(self): | 75 def Pop(self): |
68 """return the next character from stdin. Watch out for lookahead. If | 76 """Called when we encounter a '}'.""" |
69 the character is a control character, translate it to a space or | 77 self.nesting -= 1 |
70 linefeed. | 78 # We treat each top-level opening brace as a single scope that can span |
71 """ | 79 # several sets of nested braces. |
72 c = self.theLookahead | 80 if self.nesting == 0: |
73 self.theLookahead = None | 81 self.map = {} |
74 if c == None: | 82 self.identifier_counter = 0 |
75 c = self.instream.read(1) | 83 |
76 if c >= ' ' or c == '\n': | 84 def Declaration(self, m): |
77 return c | 85 """Rewrites bits of the program selected by a regexp.""" |
78 if c == '': # EOF | 86 # These can be curly braces, literal strings, function declarations and var |
79 return '\000' | 87 # declarations. (These last two must be on one line including the opening |
80 if c == '\r': | 88 # curly brace of the function for their variables to be renamed). |
81 return '\n' | 89 matched_text = m.group(0) |
82 return ' ' | 90 if matched_text == "{": |
83 | 91 self.Push() |
84 def _peek(self): | 92 return matched_text |
85 self.theLookahead = self._get() | 93 if matched_text == "}": |
86 return self.theLookahead | 94 self.Pop() |
87 | 95 return matched_text |
88 def _next(self): | 96 if re.match("[\"'/]", matched_text): |
89 """get the next character, excluding comments. peek() is used to see | 97 return matched_text |
90 if an unescaped '/' is followed by a '/' or '*'. | 98 m = re.match(r"var ", matched_text) |
91 """ | 99 if m: |
92 c = self._get() | 100 var_names = matched_text[m.end():] |
93 if c == '/' and self.theA != '\\': | 101 var_names = re.split(r",", var_names) |
94 p = self._peek() | 102 return "var " + ",".join(map(self.FindNewName, var_names)) |
95 if p == '/': | 103 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) |
96 c = self._get() | 104 if m: |
97 while c > '\n': | 105 up_to_args = m.group(1) |
98 c = self._get() | 106 args = m.group(2) |
99 return c | 107 args = re.split(r",", args) |
100 if p == '*': | 108 self.Push() |
101 c = self._get() | 109 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" |
102 while 1: | 110 |
103 c = self._get() | 111 if matched_text in self.map: |
104 if c == '*': | 112 return self.map[matched_text] |
105 if self._peek() == '/': | 113 |
106 self._get() | 114 return matched_text |
107 return ' ' | 115 |
108 if c == '\000': | 116 def CharFromNumber(self, number): |
109 raise UnterminatedComment() | 117 """A single-digit base-52 encoding using a-zA-Z.""" |
110 | 118 if number < 26: |
111 return c | 119 return chr(number + 97) |
112 | 120 number -= 26 |
113 def _action(self, action): | 121 return chr(number + 65) |
114 """do something! What you do is determined by the argument: | 122 |
115 1 Output A. Copy B to A. Get the next B. | 123 def FindNewName(self, var_name): |
116 2 Copy B to A. Get the next B. (Delete A). | 124 """Finds a new 1-character or 2-character name for a variable.""" |
117 3 Get the next B. (Delete B). | 125 # Enters it into the mapping table for this scope. |
Christian Plesner Hansen
2009/09/23 12:19:35
Why is this not part of the docstring?
| |
118 action treats a string as a single character. Wow! | 126 new_identifier = "" |
119 action recognizes a regular expression if it is preceded by ( or , or =. | 127 # Variable names that end in _ are member variables of the global object, |
120 """ | 128 # so they can be visible from code in a different scope. We leave them |
121 if action <= 1: | 129 # alone. |
122 self._outA() | 130 if var_name in self.map: |
123 | 131 return self.map[var_name] |
124 if action <= 2: | 132 if self.nesting == 0: |
125 self.theA = self.theB | 133 return var_name |
126 if self.theA == "'" or self.theA == '"': | 134 while True: |
127 while 1: | 135 identifier_first_char = self.identifier_counter % 52 |
128 self._outA() | 136 identifier_second_char = self.identifier_counter / 52 |
129 self.theA = self._get() | 137 new_identifier = self.CharFromNumber(identifier_first_char) |
130 if self.theA == self.theB: | 138 if identifier_second_char != 0: |
131 break | 139 new_identifier = ( |
132 if self.theA <= '\n': | 140 self.CharFromNumber(identifier_second_char - 1) + new_identifier) |
133 raise UnterminatedStringLiteral() | 141 self.identifier_counter += 1 |
134 if self.theA == '\\': | 142 if not new_identifier in self.seen_identifiers: |
135 self._outA() | 143 break |
136 self.theA = self._get() | 144 |
137 | 145 self.map[var_name] = new_identifier |
138 | 146 return new_identifier |
139 if action <= 3: | 147 |
140 self.theB = self._next() | 148 def RemoveSpaces(self, m): |
141 if self.theB == '/' and (self.theA == '(' or self.theA == ',' or | 149 """Returns literal strings unchanged, replaces other inputs with group 2.""" |
142 self.theA == '=' or self.theA == ':' or | 150 # Other inputs are replaced with the contents of capture 1. This is either |
143 self.theA == '[' or self.theA == '?' or | 151 # a single space or an empty string. |
144 self.theA == '!' or self.theA == '&' or | 152 entire_match = m.group(0) |
145 self.theA == '|' or self.theA == ';' or | 153 replacement = m.group(1) |
146 self.theA == '{' or self.theA == '}' or | 154 if re.match(r"'.*'$", entire_match): |
147 self.theA == '\n'): | 155 return entire_match |
148 self._outA() | 156 if re.match(r'".*"$', entire_match): |
149 self._outB() | 157 return entire_match |
150 while 1: | 158 if re.match(r"/.+/$", entire_match): |
151 self.theA = self._get() | 159 return entire_match |
152 if self.theA == '/': | 160 return replacement |
153 break | 161 |
154 elif self.theA == '\\': | 162 def JSMinify(self, text): |
155 self._outA() | 163 """The main entry point. Takes a text and returns a compressed version.""" |
156 self.theA = self._get() | 164 # The compressed version hopefully does the same thing. Line breaks are |
157 elif self.theA <= '\n': | 165 # preserved. |
158 raise UnterminatedRegularExpression() | 166 new_lines = [] |
159 self._outA() | 167 for line in re.split(r"\n", text): |
160 self.theB = self._next() | 168 line = line.replace("\t", " ") |
161 | 169 if self.in_comment: |
162 | 170 m = re.search(r"\*/", line) |
163 def _jsmin(self): | 171 if m: |
164 """Copy the input to the output, deleting the characters which are | 172 line = line[m.end():] |
165 insignificant to JavaScript. Comments will be removed. Tabs will be | 173 self.in_comment = False |
166 replaced with spaces. Carriage returns will be replaced with linefeed s. | 174 else: |
167 Most spaces and linefeeds will be removed. | 175 new_lines.append("") |
168 """ | 176 continue |
169 self.theA = '\n' | 177 |
170 self._action(3) | 178 if not self.in_comment: |
171 | 179 line = re.sub(r"/\*.*?\*/", " ", line) |
172 while self.theA != '\000': | 180 line = re.sub(r"//.*", "", line) |
173 if self.theA == ' ': | 181 m = re.search(r"/\*", line) |
174 if isAlphanum(self.theB): | 182 if m: |
175 self._action(1) | 183 line = line[:m.start()] |
176 else: | 184 self.in_comment = True |
177 self._action(2) | 185 |
178 elif self.theA == '\n': | 186 # Strip leading and trailing spaces. |
179 if self.theB in ['{', '[', '(', '+', '-']: | 187 line = re.sub(r"^ +", "", line) |
180 self._action(1) | 188 line = re.sub(r" +$", "", line) |
181 elif self.theB == ' ': | 189 # A regexp that matches a literal string surrounded by "double quotes". |
182 self._action(3) | 190 # This regexp can handle embedded backslash-escaped characters including |
183 else: | 191 # embedded backslash-escaped double quotes. |
184 if isAlphanum(self.theB): | 192 double_quoted_string = r'"(?:[^"\\]|\\.)*"' |
185 self._action(1) | 193 # A regexp that matches a literal string surrounded by 'double quotes'. |
186 else: | 194 single_quoted_string = r"'(?:[^'\\]|\\.)*'" |
187 self._action(2) | 195 # A regexp that matches a regexp literal surrounded by /slashes/. |
188 else: | 196 slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/" |
189 if self.theB == ' ': | 197 # Replace multiple spaces with a single space. |
190 if isAlphanum(self.theA): | 198 line = re.sub("|".join([double_quoted_string, |
191 self._action(1) | 199 single_quoted_string, |
192 else: | 200 slash_quoted_regexp, |
193 self._action(3) | 201 "( )+"]), |
194 elif self.theB == '\n': | 202 self.RemoveSpaces, |
195 if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: | 203 line) |
196 self._action(1) | 204 # Strip single spaces unless they have an identifier character both before |
197 else: | 205 # and after the space. % and $ are counted as identifier characters. |
198 if isAlphanum(self.theA): | 206 line = re.sub("|".join([double_quoted_string, |
199 self._action(1) | 207 single_quoted_string, |
200 else: | 208 slash_quoted_regexp, |
201 self._action(3) | 209 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), |
202 else: | 210 self.RemoveSpaces, |
203 self._action(1) | 211 line) |
204 | 212 # Collect keywords and identifiers that are already in use. |
205 def minify(self, instream, outstream): | 213 if self.nesting == 0: |
206 self.instream = instream | 214 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) |
207 self.outstream = outstream | 215 function_declaration_regexp = ( |
208 self.theA = '\n' | 216 r"\bfunction" # Function definition keyword... |
209 self.theB = None | 217 r"( [\w$%]+)?" # ...optional function name... |
210 self.theLookahead = None | 218 r"\([\w$%,]+\)\{") # ...argument declarations. |
211 | 219 # Unfortunately the keyword-value syntax { key:value } makes the key look |
212 self._jsmin() | 220 # like a variable where in fact it is a literal string. We use the |
213 self.instream.close() | 221 # presence or absence of a question mark to try to distinguish between |
214 | 222 # this case and the ternary operator: "condition ? iftrue : iffalse". |
215 if __name__ == '__main__': | 223 if re.search(r"\?", line): |
216 import sys | 224 block_trailing_colon = r"" |
217 jsm = JavascriptMinify() | 225 else: |
218 jsm.minify(sys.stdin, sys.stdout) | 226 block_trailing_colon = r"(?![:\w$%])" |
227 # Variable use. Cannot follow a period precede a colon. | |
228 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon | |
229 line = re.sub("|".join([double_quoted_string, | |
230 single_quoted_string, | |
231 slash_quoted_regexp, | |
232 r"\{", # Curly braces. | |
233 r"\}", | |
234 r"\bvar [\w$%,]+", # var declarations. | |
235 function_declaration_regexp, | |
236 variable_use_regexp]), | |
237 self.Declaration, | |
238 line) | |
239 new_lines.append(line) | |
240 | |
241 return "\n".join(new_lines) + "\n" | |
OLD | NEW |