Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python2.4 |
| 2 | 2 |
| 3 # This code is original from jsmin by Douglas Crockford, it was translated to | 3 # Copyright 2009 the V8 project authors. All rights reserved. |
| 4 # Python by Baruch Even. The original code had the following copyright and | 4 # Redistribution and use in source and binary forms, with or without |
| 5 # license. | 5 # modification, are permitted provided that the following conditions are |
| 6 # met: | |
| 6 # | 7 # |
| 7 # /* jsmin.c | 8 # * Redistributions of source code must retain the above copyright |
| 8 # 2007-05-22 | 9 # notice, this list of conditions and the following disclaimer. |
| 10 # * Redistributions in binary form must reproduce the above | |
| 11 # copyright notice, this list of conditions and the following | |
| 12 # disclaimer in the documentation and/or other materials provided | |
| 13 # with the distribution. | |
| 14 # * Neither the name of Google Inc. nor the names of its | |
| 15 # contributors may be used to endorse or promote products derived | |
| 16 # from this software without specific prior written permission. | |
| 9 # | 17 # |
| 10 # Copyright (c) 2002 Douglas Crockford (www.crockford.com) | 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 11 # | 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 12 # Permission is hereby granted, free of charge, to any person obtaining a copy o f | 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 13 # this software and associated documentation files (the "Software"), to deal in | 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 14 # the Software without restriction, including without limitation the rights to | 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 15 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 16 # of the Software, and to permit persons to whom the Software is furnished to do | 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 17 # so, subject to the following conditions: | 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 18 # | 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 19 # The above copyright notice and this permission notice shall be included in all | 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 20 # copies or substantial portions of the Software. | 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 21 # | 29 |
| 22 # The Software shall be used for Good, not Evil. | 30 # Suppress copyright warning: pylint: disable-msg=C6304 |
|
Christian Plesner Hansen
2009/09/23 12:19:35
I would suggest removing this and just letting pyl
| |
| 23 # | 31 |
| 24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 32 """A JavaScript minifier. |
| 25 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 33 |
| 26 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 34 It is far from being a complete JS parser, so there are many valid |
| 27 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 35 JavaScript programs that will be ruined by it. Another strangeness is that |
| 28 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 36 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip |
| 29 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 37 out blank lines in order to ease debugging. Variables at the top scope are |
| 30 # SOFTWARE. | 38 properties of the global object so we can't rename them. It is assumed that |
| 31 # */ | 39 you introduce variables with var as if JavaScript followed C++ scope rules |
| 32 | 40 around curly braces, so the declaration must be above the first use. |
| 33 from StringIO import StringIO | 41 |
| 34 | 42 Use as: |
| 35 def jsmin(js): | 43 import jsmin |
| 36 ins = StringIO(js) | 44 minifier = JavaScriptMinifier() |
| 37 outs = StringIO() | 45 program1 = minifier.JSMinify(program1) |
| 38 JavascriptMinify().minify(ins, outs) | 46 program2 = minifier.JSMinify(program2) |
| 39 str = outs.getvalue() | 47 """ |
| 40 if len(str) > 0 and str[0] == '\n': | 48 |
| 41 str = str[1:] | 49 import re |
| 42 return str | 50 |
| 43 | 51 |
| 44 def isAlphanum(c): | 52 class JavaScriptMinifier(object): |
| 45 """return true if the character is a letter, digit, underscore, | 53 """An object that you can feed code snippets to to get them minified.""" |
| 46 dollar sign, or non-ASCII character. | 54 |
| 47 """ | 55 def __init__(self): |
| 48 return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or | 56 # We prepopulate the list of identifiers that shouldn't be used. These |
| 49 (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c i s not None and ord(c) > 126)); | 57 # short language keywords could otherwise be used by the script as variable |
| 50 | 58 # names. |
| 51 class UnterminatedComment(Exception): | 59 self.seen_identifiers = {"do": True, "in": True} |
| 52 pass | 60 self.identifier_counter = 0 |
| 53 | 61 self.in_comment = False |
| 54 class UnterminatedStringLiteral(Exception): | 62 self.map = {} |
| 55 pass | 63 self.nesting = 0 |
| 56 | 64 |
| 57 class UnterminatedRegularExpression(Exception): | 65 def LookAtIdentifier(self, m): |
| 58 pass | 66 """Records identifiers or keywords that we see in use.""" |
| 59 | 67 # (So we can avoid renaming variables to these strings.) |
| 60 class JavascriptMinify(object): | 68 identifier = m.group(1) |
| 61 | 69 self.seen_identifiers[identifier] = True |
| 62 def _outA(self): | 70 |
| 63 self.outstream.write(self.theA) | 71 def Push(self): |
| 64 def _outB(self): | 72 """Called when we encounter a '{'.""" |
| 65 self.outstream.write(self.theB) | 73 self.nesting += 1 |
| 66 | 74 |
| 67 def _get(self): | 75 def Pop(self): |
| 68 """return the next character from stdin. Watch out for lookahead. If | 76 """Called when we encounter a '}'.""" |
| 69 the character is a control character, translate it to a space or | 77 self.nesting -= 1 |
| 70 linefeed. | 78 # We treat each top-level opening brace as a single scope that can span |
| 71 """ | 79 # several sets of nested braces. |
| 72 c = self.theLookahead | 80 if self.nesting == 0: |
| 73 self.theLookahead = None | 81 self.map = {} |
| 74 if c == None: | 82 self.identifier_counter = 0 |
| 75 c = self.instream.read(1) | 83 |
| 76 if c >= ' ' or c == '\n': | 84 def Declaration(self, m): |
| 77 return c | 85 """Rewrites bits of the program selected by a regexp.""" |
| 78 if c == '': # EOF | 86 # These can be curly braces, literal strings, function declarations and var |
| 79 return '\000' | 87 # declarations. (These last two must be on one line including the opening |
| 80 if c == '\r': | 88 # curly brace of the function for their variables to be renamed). |
| 81 return '\n' | 89 matched_text = m.group(0) |
| 82 return ' ' | 90 if matched_text == "{": |
| 83 | 91 self.Push() |
| 84 def _peek(self): | 92 return matched_text |
| 85 self.theLookahead = self._get() | 93 if matched_text == "}": |
| 86 return self.theLookahead | 94 self.Pop() |
| 87 | 95 return matched_text |
| 88 def _next(self): | 96 if re.match("[\"'/]", matched_text): |
| 89 """get the next character, excluding comments. peek() is used to see | 97 return matched_text |
| 90 if an unescaped '/' is followed by a '/' or '*'. | 98 m = re.match(r"var ", matched_text) |
| 91 """ | 99 if m: |
| 92 c = self._get() | 100 var_names = matched_text[m.end():] |
| 93 if c == '/' and self.theA != '\\': | 101 var_names = re.split(r",", var_names) |
| 94 p = self._peek() | 102 return "var " + ",".join(map(self.FindNewName, var_names)) |
| 95 if p == '/': | 103 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) |
| 96 c = self._get() | 104 if m: |
| 97 while c > '\n': | 105 up_to_args = m.group(1) |
| 98 c = self._get() | 106 args = m.group(2) |
| 99 return c | 107 args = re.split(r",", args) |
| 100 if p == '*': | 108 self.Push() |
| 101 c = self._get() | 109 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" |
| 102 while 1: | 110 |
| 103 c = self._get() | 111 if matched_text in self.map: |
| 104 if c == '*': | 112 return self.map[matched_text] |
| 105 if self._peek() == '/': | 113 |
| 106 self._get() | 114 return matched_text |
| 107 return ' ' | 115 |
| 108 if c == '\000': | 116 def CharFromNumber(self, number): |
| 109 raise UnterminatedComment() | 117 """A single-digit base-52 encoding using a-zA-Z.""" |
| 110 | 118 if number < 26: |
| 111 return c | 119 return chr(number + 97) |
| 112 | 120 number -= 26 |
| 113 def _action(self, action): | 121 return chr(number + 65) |
| 114 """do something! What you do is determined by the argument: | 122 |
| 115 1 Output A. Copy B to A. Get the next B. | 123 def FindNewName(self, var_name): |
| 116 2 Copy B to A. Get the next B. (Delete A). | 124 """Finds a new 1-character or 2-character name for a variable.""" |
| 117 3 Get the next B. (Delete B). | 125 # Enters it into the mapping table for this scope. |
|
Christian Plesner Hansen
2009/09/23 12:19:35
Why is this not part of the docstring?
| |
| 118 action treats a string as a single character. Wow! | 126 new_identifier = "" |
| 119 action recognizes a regular expression if it is preceded by ( or , or =. | 127 # Variable names that end in _ are member variables of the global object, |
| 120 """ | 128 # so they can be visible from code in a different scope. We leave them |
| 121 if action <= 1: | 129 # alone. |
| 122 self._outA() | 130 if var_name in self.map: |
| 123 | 131 return self.map[var_name] |
| 124 if action <= 2: | 132 if self.nesting == 0: |
| 125 self.theA = self.theB | 133 return var_name |
| 126 if self.theA == "'" or self.theA == '"': | 134 while True: |
| 127 while 1: | 135 identifier_first_char = self.identifier_counter % 52 |
| 128 self._outA() | 136 identifier_second_char = self.identifier_counter / 52 |
| 129 self.theA = self._get() | 137 new_identifier = self.CharFromNumber(identifier_first_char) |
| 130 if self.theA == self.theB: | 138 if identifier_second_char != 0: |
| 131 break | 139 new_identifier = ( |
| 132 if self.theA <= '\n': | 140 self.CharFromNumber(identifier_second_char - 1) + new_identifier) |
| 133 raise UnterminatedStringLiteral() | 141 self.identifier_counter += 1 |
| 134 if self.theA == '\\': | 142 if not new_identifier in self.seen_identifiers: |
| 135 self._outA() | 143 break |
| 136 self.theA = self._get() | 144 |
| 137 | 145 self.map[var_name] = new_identifier |
| 138 | 146 return new_identifier |
| 139 if action <= 3: | 147 |
| 140 self.theB = self._next() | 148 def RemoveSpaces(self, m): |
| 141 if self.theB == '/' and (self.theA == '(' or self.theA == ',' or | 149 """Returns literal strings unchanged, replaces other inputs with group 2.""" |
| 142 self.theA == '=' or self.theA == ':' or | 150 # Other inputs are replaced with the contents of capture 1. This is either |
| 143 self.theA == '[' or self.theA == '?' or | 151 # a single space or an empty string. |
| 144 self.theA == '!' or self.theA == '&' or | 152 entire_match = m.group(0) |
| 145 self.theA == '|' or self.theA == ';' or | 153 replacement = m.group(1) |
| 146 self.theA == '{' or self.theA == '}' or | 154 if re.match(r"'.*'$", entire_match): |
| 147 self.theA == '\n'): | 155 return entire_match |
| 148 self._outA() | 156 if re.match(r'".*"$', entire_match): |
| 149 self._outB() | 157 return entire_match |
| 150 while 1: | 158 if re.match(r"/.+/$", entire_match): |
| 151 self.theA = self._get() | 159 return entire_match |
| 152 if self.theA == '/': | 160 return replacement |
| 153 break | 161 |
| 154 elif self.theA == '\\': | 162 def JSMinify(self, text): |
| 155 self._outA() | 163 """The main entry point. Takes a text and returns a compressed version.""" |
| 156 self.theA = self._get() | 164 # The compressed version hopefully does the same thing. Line breaks are |
| 157 elif self.theA <= '\n': | 165 # preserved. |
| 158 raise UnterminatedRegularExpression() | 166 new_lines = [] |
| 159 self._outA() | 167 for line in re.split(r"\n", text): |
| 160 self.theB = self._next() | 168 line = line.replace("\t", " ") |
| 161 | 169 if self.in_comment: |
| 162 | 170 m = re.search(r"\*/", line) |
| 163 def _jsmin(self): | 171 if m: |
| 164 """Copy the input to the output, deleting the characters which are | 172 line = line[m.end():] |
| 165 insignificant to JavaScript. Comments will be removed. Tabs will be | 173 self.in_comment = False |
| 166 replaced with spaces. Carriage returns will be replaced with linefeed s. | 174 else: |
| 167 Most spaces and linefeeds will be removed. | 175 new_lines.append("") |
| 168 """ | 176 continue |
| 169 self.theA = '\n' | 177 |
| 170 self._action(3) | 178 if not self.in_comment: |
| 171 | 179 line = re.sub(r"/\*.*?\*/", " ", line) |
| 172 while self.theA != '\000': | 180 line = re.sub(r"//.*", "", line) |
| 173 if self.theA == ' ': | 181 m = re.search(r"/\*", line) |
| 174 if isAlphanum(self.theB): | 182 if m: |
| 175 self._action(1) | 183 line = line[:m.start()] |
| 176 else: | 184 self.in_comment = True |
| 177 self._action(2) | 185 |
| 178 elif self.theA == '\n': | 186 # Strip leading and trailing spaces. |
| 179 if self.theB in ['{', '[', '(', '+', '-']: | 187 line = re.sub(r"^ +", "", line) |
| 180 self._action(1) | 188 line = re.sub(r" +$", "", line) |
| 181 elif self.theB == ' ': | 189 # A regexp that matches a literal string surrounded by "double quotes". |
| 182 self._action(3) | 190 # This regexp can handle embedded backslash-escaped characters including |
| 183 else: | 191 # embedded backslash-escaped double quotes. |
| 184 if isAlphanum(self.theB): | 192 double_quoted_string = r'"(?:[^"\\]|\\.)*"' |
| 185 self._action(1) | 193 # A regexp that matches a literal string surrounded by 'double quotes'. |
| 186 else: | 194 single_quoted_string = r"'(?:[^'\\]|\\.)*'" |
| 187 self._action(2) | 195 # A regexp that matches a regexp literal surrounded by /slashes/. |
| 188 else: | 196 slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/" |
| 189 if self.theB == ' ': | 197 # Replace multiple spaces with a single space. |
| 190 if isAlphanum(self.theA): | 198 line = re.sub("|".join([double_quoted_string, |
| 191 self._action(1) | 199 single_quoted_string, |
| 192 else: | 200 slash_quoted_regexp, |
| 193 self._action(3) | 201 "( )+"]), |
| 194 elif self.theB == '\n': | 202 self.RemoveSpaces, |
| 195 if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: | 203 line) |
| 196 self._action(1) | 204 # Strip single spaces unless they have an identifier character both before |
| 197 else: | 205 # and after the space. % and $ are counted as identifier characters. |
| 198 if isAlphanum(self.theA): | 206 line = re.sub("|".join([double_quoted_string, |
| 199 self._action(1) | 207 single_quoted_string, |
| 200 else: | 208 slash_quoted_regexp, |
| 201 self._action(3) | 209 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), |
| 202 else: | 210 self.RemoveSpaces, |
| 203 self._action(1) | 211 line) |
| 204 | 212 # Collect keywords and identifiers that are already in use. |
| 205 def minify(self, instream, outstream): | 213 if self.nesting == 0: |
| 206 self.instream = instream | 214 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) |
| 207 self.outstream = outstream | 215 function_declaration_regexp = ( |
| 208 self.theA = '\n' | 216 r"\bfunction" # Function definition keyword... |
| 209 self.theB = None | 217 r"( [\w$%]+)?" # ...optional function name... |
| 210 self.theLookahead = None | 218 r"\([\w$%,]+\)\{") # ...argument declarations. |
| 211 | 219 # Unfortunately the keyword-value syntax { key:value } makes the key look |
| 212 self._jsmin() | 220 # like a variable where in fact it is a literal string. We use the |
| 213 self.instream.close() | 221 # presence or absence of a question mark to try to distinguish between |
| 214 | 222 # this case and the ternary operator: "condition ? iftrue : iffalse". |
| 215 if __name__ == '__main__': | 223 if re.search(r"\?", line): |
| 216 import sys | 224 block_trailing_colon = r"" |
| 217 jsm = JavascriptMinify() | 225 else: |
| 218 jsm.minify(sys.stdin, sys.stdout) | 226 block_trailing_colon = r"(?![:\w$%])" |
| 227 # Variable use. Cannot follow a period precede a colon. | |
| 228 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon | |
| 229 line = re.sub("|".join([double_quoted_string, | |
| 230 single_quoted_string, | |
| 231 slash_quoted_regexp, | |
| 232 r"\{", # Curly braces. | |
| 233 r"\}", | |
| 234 r"\bvar [\w$%,]+", # var declarations. | |
| 235 function_declaration_regexp, | |
| 236 variable_use_regexp]), | |
| 237 self.Declaration, | |
| 238 line) | |
| 239 new_lines.append(line) | |
| 240 | |
| 241 return "\n".join(new_lines) + "\n" | |
| OLD | NEW |