Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(80)

Side by Side Diff: tools/jsmin.py

Issue 215052: * Remove non-Open Source code from Douglas Crockford.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/js2c.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python2.4
2 2
3 # This code is original from jsmin by Douglas Crockford, it was translated to 3 # Copyright 2009 the V8 project authors. All rights reserved.
4 # Python by Baruch Even. The original code had the following copyright and 4 # Redistribution and use in source and binary forms, with or without
5 # license. 5 # modification, are permitted provided that the following conditions are
6 # met:
6 # 7 #
7 # /* jsmin.c 8 # * Redistributions of source code must retain the above copyright
8 # 2007-05-22 9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following
12 # disclaimer in the documentation and/or other materials provided
13 # with the distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
9 # 17 #
10 # Copyright (c) 2002 Douglas Crockford (www.crockford.com) 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
11 # 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
12 # Permission is hereby granted, free of charge, to any person obtaining a copy o f 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
13 # this software and associated documentation files (the "Software"), to deal in 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
14 # the Software without restriction, including without limitation the rights to 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
15 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
16 # of the Software, and to permit persons to whom the Software is furnished to do 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
17 # so, subject to the following conditions: 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
18 # 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
19 # The above copyright notice and this permission notice shall be included in all 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
20 # copies or substantial portions of the Software. 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21 # 29
22 # The Software shall be used for Good, not Evil. 30 # Suppress copyright warning: pylint: disable-msg=C6304
Christian Plesner Hansen 2009/09/23 12:19:35 I would suggest removing this and just letting pyl
23 # 31
24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 """A JavaScript minifier.
25 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33
26 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 34 It is far from being a complete JS parser, so there are many valid
27 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 35 JavaScript programs that will be ruined by it. Another strangeness is that
28 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 36 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
29 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 out blank lines in order to ease debugging. Variables at the top scope are
30 # SOFTWARE. 38 properties of the global object so we can't rename them. It is assumed that
31 # */ 39 you introduce variables with var as if JavaScript followed C++ scope rules
32 40 around curly braces, so the declaration must be above the first use.
33 from StringIO import StringIO 41
34 42 Use as:
35 def jsmin(js): 43 import jsmin
36 ins = StringIO(js) 44 minifier = JavaScriptMinifier()
37 outs = StringIO() 45 program1 = minifier.JSMinify(program1)
38 JavascriptMinify().minify(ins, outs) 46 program2 = minifier.JSMinify(program2)
39 str = outs.getvalue() 47 """
40 if len(str) > 0 and str[0] == '\n': 48
41 str = str[1:] 49 import re
42 return str 50
43 51
44 def isAlphanum(c): 52 class JavaScriptMinifier(object):
45 """return true if the character is a letter, digit, underscore, 53 """An object that you can feed code snippets to to get them minified."""
46 dollar sign, or non-ASCII character. 54
47 """ 55 def __init__(self):
48 return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or 56 # We prepopulate the list of identifiers that shouldn't be used. These
49 (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c i s not None and ord(c) > 126)); 57 # short language keywords could otherwise be used by the script as variable
50 58 # names.
51 class UnterminatedComment(Exception): 59 self.seen_identifiers = {"do": True, "in": True}
52 pass 60 self.identifier_counter = 0
53 61 self.in_comment = False
54 class UnterminatedStringLiteral(Exception): 62 self.map = {}
55 pass 63 self.nesting = 0
56 64
57 class UnterminatedRegularExpression(Exception): 65 def LookAtIdentifier(self, m):
58 pass 66 """Records identifiers or keywords that we see in use."""
59 67 # (So we can avoid renaming variables to these strings.)
60 class JavascriptMinify(object): 68 identifier = m.group(1)
61 69 self.seen_identifiers[identifier] = True
62 def _outA(self): 70
63 self.outstream.write(self.theA) 71 def Push(self):
64 def _outB(self): 72 """Called when we encounter a '{'."""
65 self.outstream.write(self.theB) 73 self.nesting += 1
66 74
67 def _get(self): 75 def Pop(self):
68 """return the next character from stdin. Watch out for lookahead. If 76 """Called when we encounter a '}'."""
69 the character is a control character, translate it to a space or 77 self.nesting -= 1
70 linefeed. 78 # We treat each top-level opening brace as a single scope that can span
71 """ 79 # several sets of nested braces.
72 c = self.theLookahead 80 if self.nesting == 0:
73 self.theLookahead = None 81 self.map = {}
74 if c == None: 82 self.identifier_counter = 0
75 c = self.instream.read(1) 83
76 if c >= ' ' or c == '\n': 84 def Declaration(self, m):
77 return c 85 """Rewrites bits of the program selected by a regexp."""
78 if c == '': # EOF 86 # These can be curly braces, literal strings, function declarations and var
79 return '\000' 87 # declarations. (These last two must be on one line including the opening
80 if c == '\r': 88 # curly brace of the function for their variables to be renamed).
81 return '\n' 89 matched_text = m.group(0)
82 return ' ' 90 if matched_text == "{":
83 91 self.Push()
84 def _peek(self): 92 return matched_text
85 self.theLookahead = self._get() 93 if matched_text == "}":
86 return self.theLookahead 94 self.Pop()
87 95 return matched_text
88 def _next(self): 96 if re.match("[\"'/]", matched_text):
89 """get the next character, excluding comments. peek() is used to see 97 return matched_text
90 if an unescaped '/' is followed by a '/' or '*'. 98 m = re.match(r"var ", matched_text)
91 """ 99 if m:
92 c = self._get() 100 var_names = matched_text[m.end():]
93 if c == '/' and self.theA != '\\': 101 var_names = re.split(r",", var_names)
94 p = self._peek() 102 return "var " + ",".join(map(self.FindNewName, var_names))
95 if p == '/': 103 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
96 c = self._get() 104 if m:
97 while c > '\n': 105 up_to_args = m.group(1)
98 c = self._get() 106 args = m.group(2)
99 return c 107 args = re.split(r",", args)
100 if p == '*': 108 self.Push()
101 c = self._get() 109 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
102 while 1: 110
103 c = self._get() 111 if matched_text in self.map:
104 if c == '*': 112 return self.map[matched_text]
105 if self._peek() == '/': 113
106 self._get() 114 return matched_text
107 return ' ' 115
108 if c == '\000': 116 def CharFromNumber(self, number):
109 raise UnterminatedComment() 117 """A single-digit base-52 encoding using a-zA-Z."""
110 118 if number < 26:
111 return c 119 return chr(number + 97)
112 120 number -= 26
113 def _action(self, action): 121 return chr(number + 65)
114 """do something! What you do is determined by the argument: 122
115 1 Output A. Copy B to A. Get the next B. 123 def FindNewName(self, var_name):
116 2 Copy B to A. Get the next B. (Delete A). 124 """Finds a new 1-character or 2-character name for a variable."""
117 3 Get the next B. (Delete B). 125 # Enters it into the mapping table for this scope.
Christian Plesner Hansen 2009/09/23 12:19:35 Why is this not part of the docstring?
118 action treats a string as a single character. Wow! 126 new_identifier = ""
119 action recognizes a regular expression if it is preceded by ( or , or =. 127 # Variable names that end in _ are member variables of the global object,
120 """ 128 # so they can be visible from code in a different scope. We leave them
121 if action <= 1: 129 # alone.
122 self._outA() 130 if var_name in self.map:
123 131 return self.map[var_name]
124 if action <= 2: 132 if self.nesting == 0:
125 self.theA = self.theB 133 return var_name
126 if self.theA == "'" or self.theA == '"': 134 while True:
127 while 1: 135 identifier_first_char = self.identifier_counter % 52
128 self._outA() 136 identifier_second_char = self.identifier_counter / 52
129 self.theA = self._get() 137 new_identifier = self.CharFromNumber(identifier_first_char)
130 if self.theA == self.theB: 138 if identifier_second_char != 0:
131 break 139 new_identifier = (
132 if self.theA <= '\n': 140 self.CharFromNumber(identifier_second_char - 1) + new_identifier)
133 raise UnterminatedStringLiteral() 141 self.identifier_counter += 1
134 if self.theA == '\\': 142 if not new_identifier in self.seen_identifiers:
135 self._outA() 143 break
136 self.theA = self._get() 144
137 145 self.map[var_name] = new_identifier
138 146 return new_identifier
139 if action <= 3: 147
140 self.theB = self._next() 148 def RemoveSpaces(self, m):
141 if self.theB == '/' and (self.theA == '(' or self.theA == ',' or 149 """Returns literal strings unchanged, replaces other inputs with group 2."""
142 self.theA == '=' or self.theA == ':' or 150 # Other inputs are replaced with the contents of capture 1. This is either
143 self.theA == '[' or self.theA == '?' or 151 # a single space or an empty string.
144 self.theA == '!' or self.theA == '&' or 152 entire_match = m.group(0)
145 self.theA == '|' or self.theA == ';' or 153 replacement = m.group(1)
146 self.theA == '{' or self.theA == '}' or 154 if re.match(r"'.*'$", entire_match):
147 self.theA == '\n'): 155 return entire_match
148 self._outA() 156 if re.match(r'".*"$', entire_match):
149 self._outB() 157 return entire_match
150 while 1: 158 if re.match(r"/.+/$", entire_match):
151 self.theA = self._get() 159 return entire_match
152 if self.theA == '/': 160 return replacement
153 break 161
154 elif self.theA == '\\': 162 def JSMinify(self, text):
155 self._outA() 163 """The main entry point. Takes a text and returns a compressed version."""
156 self.theA = self._get() 164 # The compressed version hopefully does the same thing. Line breaks are
157 elif self.theA <= '\n': 165 # preserved.
158 raise UnterminatedRegularExpression() 166 new_lines = []
159 self._outA() 167 for line in re.split(r"\n", text):
160 self.theB = self._next() 168 line = line.replace("\t", " ")
161 169 if self.in_comment:
162 170 m = re.search(r"\*/", line)
163 def _jsmin(self): 171 if m:
164 """Copy the input to the output, deleting the characters which are 172 line = line[m.end():]
165 insignificant to JavaScript. Comments will be removed. Tabs will be 173 self.in_comment = False
166 replaced with spaces. Carriage returns will be replaced with linefeed s. 174 else:
167 Most spaces and linefeeds will be removed. 175 new_lines.append("")
168 """ 176 continue
169 self.theA = '\n' 177
170 self._action(3) 178 if not self.in_comment:
171 179 line = re.sub(r"/\*.*?\*/", " ", line)
172 while self.theA != '\000': 180 line = re.sub(r"//.*", "", line)
173 if self.theA == ' ': 181 m = re.search(r"/\*", line)
174 if isAlphanum(self.theB): 182 if m:
175 self._action(1) 183 line = line[:m.start()]
176 else: 184 self.in_comment = True
177 self._action(2) 185
178 elif self.theA == '\n': 186 # Strip leading and trailing spaces.
179 if self.theB in ['{', '[', '(', '+', '-']: 187 line = re.sub(r"^ +", "", line)
180 self._action(1) 188 line = re.sub(r" +$", "", line)
181 elif self.theB == ' ': 189 # A regexp that matches a literal string surrounded by "double quotes".
182 self._action(3) 190 # This regexp can handle embedded backslash-escaped characters including
183 else: 191 # embedded backslash-escaped double quotes.
184 if isAlphanum(self.theB): 192 double_quoted_string = r'"(?:[^"\\]|\\.)*"'
185 self._action(1) 193 # A regexp that matches a literal string surrounded by 'double quotes'.
186 else: 194 single_quoted_string = r"'(?:[^'\\]|\\.)*'"
187 self._action(2) 195 # A regexp that matches a regexp literal surrounded by /slashes/.
188 else: 196 slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/"
189 if self.theB == ' ': 197 # Replace multiple spaces with a single space.
190 if isAlphanum(self.theA): 198 line = re.sub("|".join([double_quoted_string,
191 self._action(1) 199 single_quoted_string,
192 else: 200 slash_quoted_regexp,
193 self._action(3) 201 "( )+"]),
194 elif self.theB == '\n': 202 self.RemoveSpaces,
195 if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: 203 line)
196 self._action(1) 204 # Strip single spaces unless they have an identifier character both before
197 else: 205 # and after the space. % and $ are counted as identifier characters.
198 if isAlphanum(self.theA): 206 line = re.sub("|".join([double_quoted_string,
199 self._action(1) 207 single_quoted_string,
200 else: 208 slash_quoted_regexp,
201 self._action(3) 209 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
202 else: 210 self.RemoveSpaces,
203 self._action(1) 211 line)
204 212 # Collect keywords and identifiers that are already in use.
205 def minify(self, instream, outstream): 213 if self.nesting == 0:
206 self.instream = instream 214 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
207 self.outstream = outstream 215 function_declaration_regexp = (
208 self.theA = '\n' 216 r"\bfunction" # Function definition keyword...
209 self.theB = None 217 r"( [\w$%]+)?" # ...optional function name...
210 self.theLookahead = None 218 r"\([\w$%,]+\)\{") # ...argument declarations.
211 219 # Unfortunately the keyword-value syntax { key:value } makes the key look
212 self._jsmin() 220 # like a variable where in fact it is a literal string. We use the
213 self.instream.close() 221 # presence or absence of a question mark to try to distinguish between
214 222 # this case and the ternary operator: "condition ? iftrue : iffalse".
215 if __name__ == '__main__': 223 if re.search(r"\?", line):
216 import sys 224 block_trailing_colon = r""
217 jsm = JavascriptMinify() 225 else:
218 jsm.minify(sys.stdin, sys.stdout) 226 block_trailing_colon = r"(?![:\w$%])"
227 # Variable use. Cannot follow a period precede a colon.
228 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
229 line = re.sub("|".join([double_quoted_string,
230 single_quoted_string,
231 slash_quoted_regexp,
232 r"\{", # Curly braces.
233 r"\}",
234 r"\bvar [\w$%,]+", # var declarations.
235 function_declaration_regexp,
236 variable_use_regexp]),
237 self.Declaration,
238 line)
239 new_lines.append(line)
240
241 return "\n".join(new_lines) + "\n"
OLDNEW
« no previous file with comments | « tools/js2c.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698