Chromium Code Reviews| Index: tools/jsmin.py |
| =================================================================== |
| --- tools/jsmin.py (revision 2941) |
| +++ tools/jsmin.py (working copy) |
| @@ -1,218 +1,241 @@ |
| -#!/usr/bin/python |
| - |
| -# This code is original from jsmin by Douglas Crockford, it was translated to |
| -# Python by Baruch Even. The original code had the following copyright and |
| -# license. |
| -# |
| -# /* jsmin.c |
| -# 2007-05-22 |
| -# |
| -# Copyright (c) 2002 Douglas Crockford (www.crockford.com) |
| -# |
| -# Permission is hereby granted, free of charge, to any person obtaining a copy of |
| -# this software and associated documentation files (the "Software"), to deal in |
| -# the Software without restriction, including without limitation the rights to |
| -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| -# of the Software, and to permit persons to whom the Software is furnished to do |
| -# so, subject to the following conditions: |
| -# |
| -# The above copyright notice and this permission notice shall be included in all |
| -# copies or substantial portions of the Software. |
| -# |
| -# The Software shall be used for Good, not Evil. |
| -# |
| -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| -# SOFTWARE. |
| -# */ |
| - |
| -from StringIO import StringIO |
| - |
| -def jsmin(js): |
| - ins = StringIO(js) |
| - outs = StringIO() |
| - JavascriptMinify().minify(ins, outs) |
| - str = outs.getvalue() |
| - if len(str) > 0 and str[0] == '\n': |
| - str = str[1:] |
| - return str |
| - |
| -def isAlphanum(c): |
| - """return true if the character is a letter, digit, underscore, |
| - dollar sign, or non-ASCII character. |
| - """ |
| - return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or |
| - (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126)); |
| - |
| -class UnterminatedComment(Exception): |
| - pass |
| - |
| -class UnterminatedStringLiteral(Exception): |
| - pass |
| - |
| -class UnterminatedRegularExpression(Exception): |
| - pass |
| - |
| -class JavascriptMinify(object): |
| - |
| - def _outA(self): |
| - self.outstream.write(self.theA) |
| - def _outB(self): |
| - self.outstream.write(self.theB) |
| - |
| - def _get(self): |
| - """return the next character from stdin. Watch out for lookahead. If |
| - the character is a control character, translate it to a space or |
| - linefeed. |
| - """ |
| - c = self.theLookahead |
| - self.theLookahead = None |
| - if c == None: |
| - c = self.instream.read(1) |
| - if c >= ' ' or c == '\n': |
| - return c |
| - if c == '': # EOF |
| - return '\000' |
| - if c == '\r': |
| - return '\n' |
| - return ' ' |
| - |
| - def _peek(self): |
| - self.theLookahead = self._get() |
| - return self.theLookahead |
| - |
| - def _next(self): |
| - """get the next character, excluding comments. peek() is used to see |
| - if an unescaped '/' is followed by a '/' or '*'. |
| - """ |
| - c = self._get() |
| - if c == '/' and self.theA != '\\': |
| - p = self._peek() |
| - if p == '/': |
| - c = self._get() |
| - while c > '\n': |
| - c = self._get() |
| - return c |
| - if p == '*': |
| - c = self._get() |
| - while 1: |
| - c = self._get() |
| - if c == '*': |
| - if self._peek() == '/': |
| - self._get() |
| - return ' ' |
| - if c == '\000': |
| - raise UnterminatedComment() |
| - |
| - return c |
| - |
| - def _action(self, action): |
| - """do something! What you do is determined by the argument: |
| - 1 Output A. Copy B to A. Get the next B. |
| - 2 Copy B to A. Get the next B. (Delete A). |
| - 3 Get the next B. (Delete B). |
| - action treats a string as a single character. Wow! |
| - action recognizes a regular expression if it is preceded by ( or , or =. |
| - """ |
| - if action <= 1: |
| - self._outA() |
| - |
| - if action <= 2: |
| - self.theA = self.theB |
| - if self.theA == "'" or self.theA == '"': |
| - while 1: |
| - self._outA() |
| - self.theA = self._get() |
| - if self.theA == self.theB: |
| - break |
| - if self.theA <= '\n': |
| - raise UnterminatedStringLiteral() |
| - if self.theA == '\\': |
| - self._outA() |
| - self.theA = self._get() |
| - |
| - |
| - if action <= 3: |
| - self.theB = self._next() |
| - if self.theB == '/' and (self.theA == '(' or self.theA == ',' or |
| - self.theA == '=' or self.theA == ':' or |
| - self.theA == '[' or self.theA == '?' or |
| - self.theA == '!' or self.theA == '&' or |
| - self.theA == '|' or self.theA == ';' or |
| - self.theA == '{' or self.theA == '}' or |
| - self.theA == '\n'): |
| - self._outA() |
| - self._outB() |
| - while 1: |
| - self.theA = self._get() |
| - if self.theA == '/': |
| - break |
| - elif self.theA == '\\': |
| - self._outA() |
| - self.theA = self._get() |
| - elif self.theA <= '\n': |
| - raise UnterminatedRegularExpression() |
| - self._outA() |
| - self.theB = self._next() |
| - |
| - |
| - def _jsmin(self): |
| - """Copy the input to the output, deleting the characters which are |
| - insignificant to JavaScript. Comments will be removed. Tabs will be |
| - replaced with spaces. Carriage returns will be replaced with linefeeds. |
| - Most spaces and linefeeds will be removed. |
| - """ |
| - self.theA = '\n' |
| - self._action(3) |
| - |
| - while self.theA != '\000': |
| - if self.theA == ' ': |
| - if isAlphanum(self.theB): |
| - self._action(1) |
| - else: |
| - self._action(2) |
| - elif self.theA == '\n': |
| - if self.theB in ['{', '[', '(', '+', '-']: |
| - self._action(1) |
| - elif self.theB == ' ': |
| - self._action(3) |
| - else: |
| - if isAlphanum(self.theB): |
| - self._action(1) |
| - else: |
| - self._action(2) |
| - else: |
| - if self.theB == ' ': |
| - if isAlphanum(self.theA): |
| - self._action(1) |
| - else: |
| - self._action(3) |
| - elif self.theB == '\n': |
| - if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: |
| - self._action(1) |
| - else: |
| - if isAlphanum(self.theA): |
| - self._action(1) |
| - else: |
| - self._action(3) |
| - else: |
| - self._action(1) |
| - |
| - def minify(self, instream, outstream): |
| - self.instream = instream |
| - self.outstream = outstream |
| - self.theA = '\n' |
| - self.theB = None |
| - self.theLookahead = None |
| - |
| - self._jsmin() |
| - self.instream.close() |
| - |
| -if __name__ == '__main__': |
| - import sys |
| - jsm = JavascriptMinify() |
| - jsm.minify(sys.stdin, sys.stdout) |
| +#!/usr/bin/python2.4 |
| + |
| +# Copyright 2009 the V8 project authors. All rights reserved. |
| +# Redistribution and use in source and binary forms, with or without |
| +# modification, are permitted provided that the following conditions are |
| +# met: |
| +# |
| +# * Redistributions of source code must retain the above copyright |
| +# notice, this list of conditions and the following disclaimer. |
| +# * Redistributions in binary form must reproduce the above |
| +# copyright notice, this list of conditions and the following |
| +# disclaimer in the documentation and/or other materials provided |
| +# with the distribution. |
| +# * Neither the name of Google Inc. nor the names of its |
| +# contributors may be used to endorse or promote products derived |
| +# from this software without specific prior written permission. |
| +# |
| +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| + |
| +# Suppress copyright warning: pylint: disable-msg=C6304 |
|
Christian Plesner Hansen
2009/09/23 12:19:35
I would suggest removing this and just letting pyl
|
| + |
| +"""A JavaScript minifier. |
| + |
| +It is far from being a complete JS parser, so there are many valid |
| +JavaScript programs that will be ruined by it. Another strangeness is that |
| +it accepts $ and % as parts of identifiers. It doesn't merge lines or strip |
| +out blank lines in order to ease debugging. Variables at the top scope are |
| +properties of the global object so we can't rename them. It is assumed that |
| +you introduce variables with var as if JavaScript followed C++ scope rules |
| +around curly braces, so the declaration must be above the first use. |
| + |
| +Use as: |
| +import jsmin |
| +minifier = JavaScriptMinifier() |
| +program1 = minifier.JSMinify(program1) |
| +program2 = minifier.JSMinify(program2) |
| +""" |
| + |
| +import re |
| + |
| + |
| +class JavaScriptMinifier(object): |
| + """An object that you can feed code snippets to to get them minified.""" |
| + |
| + def __init__(self): |
| + # We prepopulate the list of identifiers that shouldn't be used. These |
| + # short language keywords could otherwise be used by the script as variable |
| + # names. |
| + self.seen_identifiers = {"do": True, "in": True} |
| + self.identifier_counter = 0 |
| + self.in_comment = False |
| + self.map = {} |
| + self.nesting = 0 |
| + |
| + def LookAtIdentifier(self, m): |
| + """Records identifiers or keywords that we see in use.""" |
| + # (So we can avoid renaming variables to these strings.) |
| + identifier = m.group(1) |
| + self.seen_identifiers[identifier] = True |
| + |
| + def Push(self): |
| + """Called when we encounter a '{'.""" |
| + self.nesting += 1 |
| + |
| + def Pop(self): |
| + """Called when we encounter a '}'.""" |
| + self.nesting -= 1 |
| + # We treat each top-level opening brace as a single scope that can span |
| + # several sets of nested braces. |
| + if self.nesting == 0: |
| + self.map = {} |
| + self.identifier_counter = 0 |
| + |
| + def Declaration(self, m): |
| + """Rewrites bits of the program selected by a regexp.""" |
| + # These can be curly braces, literal strings, function declarations and var |
| + # declarations. (These last two must be on one line including the opening |
| + # curly brace of the function for their variables to be renamed). |
| + matched_text = m.group(0) |
| + if matched_text == "{": |
| + self.Push() |
| + return matched_text |
| + if matched_text == "}": |
| + self.Pop() |
| + return matched_text |
| + if re.match("[\"'/]", matched_text): |
| + return matched_text |
| + m = re.match(r"var ", matched_text) |
| + if m: |
| + var_names = matched_text[m.end():] |
| + var_names = re.split(r",", var_names) |
| + return "var " + ",".join(map(self.FindNewName, var_names)) |
| + m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) |
| + if m: |
| + up_to_args = m.group(1) |
| + args = m.group(2) |
| + args = re.split(r",", args) |
| + self.Push() |
| + return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" |
| + |
| + if matched_text in self.map: |
| + return self.map[matched_text] |
| + |
| + return matched_text |
| + |
| + def CharFromNumber(self, number): |
| + """A single-digit base-52 encoding using a-zA-Z.""" |
| + if number < 26: |
| + return chr(number + 97) |
| + number -= 26 |
| + return chr(number + 65) |
| + |
| + def FindNewName(self, var_name): |
| + """Finds a new 1-character or 2-character name for a variable.""" |
| + # Enters it into the mapping table for this scope. |
|
Christian Plesner Hansen
2009/09/23 12:19:35
Why is this not part of the docstring?
|
| + new_identifier = "" |
| + # Variable names that end in _ are member variables of the global object, |
| + # so they can be visible from code in a different scope. We leave them |
| + # alone. |
| + if var_name in self.map: |
| + return self.map[var_name] |
| + if self.nesting == 0: |
| + return var_name |
| + while True: |
| + identifier_first_char = self.identifier_counter % 52 |
| + identifier_second_char = self.identifier_counter / 52 |
| + new_identifier = self.CharFromNumber(identifier_first_char) |
| + if identifier_second_char != 0: |
| + new_identifier = ( |
| + self.CharFromNumber(identifier_second_char - 1) + new_identifier) |
| + self.identifier_counter += 1 |
| + if not new_identifier in self.seen_identifiers: |
| + break |
| + |
| + self.map[var_name] = new_identifier |
| + return new_identifier |
| + |
| + def RemoveSpaces(self, m): |
| + """Returns literal strings unchanged, replaces other inputs with group 2.""" |
| + # Other inputs are replaced with the contents of capture 1. This is either |
| + # a single space or an empty string. |
| + entire_match = m.group(0) |
| + replacement = m.group(1) |
| + if re.match(r"'.*'$", entire_match): |
| + return entire_match |
| + if re.match(r'".*"$', entire_match): |
| + return entire_match |
| + if re.match(r"/.+/$", entire_match): |
| + return entire_match |
| + return replacement |
| + |
| + def JSMinify(self, text): |
| + """The main entry point. Takes a text and returns a compressed version.""" |
| + # The compressed version hopefully does the same thing. Line breaks are |
| + # preserved. |
| + new_lines = [] |
| + for line in re.split(r"\n", text): |
| + line = line.replace("\t", " ") |
| + if self.in_comment: |
| + m = re.search(r"\*/", line) |
| + if m: |
| + line = line[m.end():] |
| + self.in_comment = False |
| + else: |
| + new_lines.append("") |
| + continue |
| + |
| + if not self.in_comment: |
| + line = re.sub(r"/\*.*?\*/", " ", line) |
| + line = re.sub(r"//.*", "", line) |
| + m = re.search(r"/\*", line) |
| + if m: |
| + line = line[:m.start()] |
| + self.in_comment = True |
| + |
| + # Strip leading and trailing spaces. |
| + line = re.sub(r"^ +", "", line) |
| + line = re.sub(r" +$", "", line) |
| + # A regexp that matches a literal string surrounded by "double quotes". |
| + # This regexp can handle embedded backslash-escaped characters including |
| + # embedded backslash-escaped double quotes. |
| + double_quoted_string = r'"(?:[^"\\]|\\.)*"' |
| + # A regexp that matches a literal string surrounded by 'double quotes'. |
| + single_quoted_string = r"'(?:[^'\\]|\\.)*'" |
| + # A regexp that matches a regexp literal surrounded by /slashes/. |
| + slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/" |
| + # Replace multiple spaces with a single space. |
| + line = re.sub("|".join([double_quoted_string, |
| + single_quoted_string, |
| + slash_quoted_regexp, |
| + "( )+"]), |
| + self.RemoveSpaces, |
| + line) |
| + # Strip single spaces unless they have an identifier character both before |
| + # and after the space. % and $ are counted as identifier characters. |
| + line = re.sub("|".join([double_quoted_string, |
| + single_quoted_string, |
| + slash_quoted_regexp, |
| + r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), |
| + self.RemoveSpaces, |
| + line) |
| + # Collect keywords and identifiers that are already in use. |
| + if self.nesting == 0: |
| + re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) |
| + function_declaration_regexp = ( |
| + r"\bfunction" # Function definition keyword... |
| + r"( [\w$%]+)?" # ...optional function name... |
| + r"\([\w$%,]+\)\{") # ...argument declarations. |
| + # Unfortunately the keyword-value syntax { key:value } makes the key look |
| + # like a variable where in fact it is a literal string. We use the |
| + # presence or absence of a question mark to try to distinguish between |
| + # this case and the ternary operator: "condition ? iftrue : iffalse". |
| + if re.search(r"\?", line): |
| + block_trailing_colon = r"" |
| + else: |
| + block_trailing_colon = r"(?![:\w$%])" |
| + # Variable use. Cannot follow a period precede a colon. |
| + variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon |
| + line = re.sub("|".join([double_quoted_string, |
| + single_quoted_string, |
| + slash_quoted_regexp, |
| + r"\{", # Curly braces. |
| + r"\}", |
| + r"\bvar [\w$%,]+", # var declarations. |
| + function_declaration_regexp, |
| + variable_use_regexp]), |
| + self.Declaration, |
| + line) |
| + new_lines.append(line) |
| + |
| + return "\n".join(new_lines) + "\n" |